[lld] 6836a47 - [DebugInfo] Add error checking around data extraction in DWARFAbbreviationDeclaration::extract
Alex Langford via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 6 10:51:35 PDT 2023
Author: Alex Langford
Date: 2023-06-06T10:50:54-07:00
New Revision: 6836a47b7e6b57927664ec6ec750ae37bb951129
URL: https://github.com/llvm/llvm-project/commit/6836a47b7e6b57927664ec6ec750ae37bb951129
DIFF: https://github.com/llvm/llvm-project/commit/6836a47b7e6b57927664ec6ec750ae37bb951129.diff
LOG: [DebugInfo] Add error checking around data extraction in DWARFAbbreviationDeclaration::extract
In trying to hoist errors further up this callstack, I discovered that
if the data in the debug_abbrev section is invalid entirely, the code
that parses the debug_abbrev section may do strange or unpredictable
things. The underlying issue is that DataExtractor will return a value
of 0 when it encounters an error in extracting a LEB128 value. It's thus
difficult to determine if there was an error just by looking at the
return value. This patch aims to bail at the first sight of an error in
the debug_abbrev parsing code.
Differential Revision: https://reviews.llvm.org/D151755
Added:
Modified:
lld/test/MachO/stabs-dwarf5.s
lld/test/MachO/stabs-icf.s
lld/test/MachO/stabs.s
llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
Removed:
################################################################################
diff --git a/lld/test/MachO/stabs-dwarf5.s b/lld/test/MachO/stabs-dwarf5.s
index 61a0986e67d50..169f17987aab4 100644
--- a/lld/test/MachO/stabs-dwarf5.s
+++ b/lld/test/MachO/stabs-dwarf5.s
@@ -51,6 +51,8 @@ Lsection_abbrev:
.byte 114 ## DW_AT_str_offsets_base
.byte 23 ## DW_FORM_sec_offset
.byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
diff --git a/lld/test/MachO/stabs-icf.s b/lld/test/MachO/stabs-icf.s
index 129fc86663df2..99d0871ce4d2c 100644
--- a/lld/test/MachO/stabs-icf.s
+++ b/lld/test/MachO/stabs-icf.s
@@ -64,6 +64,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index aa96f7c147a7e..145e7960b6657 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -205,6 +205,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
@@ -254,6 +256,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index e131c42375ab5..11de45db7a4e2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -38,18 +38,28 @@ llvm::Expected<DWARFAbbreviationDeclaration::ExtractState>
DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
clear();
const uint64_t Offset = *OffsetPtr;
- Code = Data.getULEB128(OffsetPtr);
+ Error Err = Error::success();
+ Code = Data.getULEB128(OffsetPtr, &Err);
+ if (Err)
+ return Err;
+
if (Code == 0)
return ExtractState::Complete;
CodeByteSize = *OffsetPtr - Offset;
- Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
+ Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return Err;
+
if (Tag == DW_TAG_null) {
clear();
return make_error<llvm::object::GenericBinaryError>(
"abbreviation declaration requires a non-null tag");
}
- uint8_t ChildrenByte = Data.getU8(OffsetPtr);
+ uint8_t ChildrenByte = Data.getU8(OffsetPtr, &Err);
+ if (Err)
+ return Err;
+
HasChildren = (ChildrenByte == DW_CHILDREN_yes);
// Assign a value to our optional FixedAttributeSize member variable. If
// this member variable still has a value after the while loop below, then
@@ -58,8 +68,13 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
// Read all of the abbreviation attributes and forms.
while (Data.isValidOffset(*OffsetPtr)) {
- auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
- auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
+ auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return Err;
+
+ auto F = static_cast<Form>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return Err;
// We successfully reached the end of this abbreviation declaration
// since both attribute and form are zero. There may be more abbreviation
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
index ad61ccbc22c95..62627ae6956ff 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
@@ -42,6 +42,16 @@ void writeAbbreviationDeclarations(raw_ostream &OS, uint32_t FirstCode,
encodeULEB128(0, OS);
}
+void writeMalformedULEB128Value(raw_ostream &OS) {
+ OS << static_cast<uint8_t>(0x80);
+}
+
+void writeULEB128LargerThan64Bits(raw_ostream &OS) {
+ static constexpr llvm::StringRef LargeULEB128 =
+ "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01";
+ OS << LargeULEB128;
+}
+
TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccess) {
SmallString<64> RawData;
raw_svector_ostream OS(RawData);
@@ -104,3 +114,168 @@ TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccessOutOfOrder) {
EXPECT_FALSE(Abbrev1->hasChildren());
EXPECT_EQ(Abbrev1->getNumAttributes(), 1u);
}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetCodeExtractionError) {
+ SmallString<64> RawData;
+
+ // Check for malformed ULEB128.
+ {
+ raw_svector_ostream OS(RawData);
+ writeMalformedULEB128Value(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
+ EXPECT_EQ(Offset, 0u);
+ }
+
+ RawData.clear();
+ // Check for ULEB128 too large to fit into a uin64_t.
+ {
+ raw_svector_ostream OS(RawData);
+ writeULEB128LargerThan64Bits(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
+ EXPECT_EQ(Offset, 0u);
+ }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetTagExtractionError) {
+ SmallString<64> RawData;
+ const uint32_t Code = 1;
+
+ // Check for malformed ULEB128.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ writeMalformedULEB128Value(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // Only the code was extracted correctly.
+ EXPECT_EQ(Offset, 1u);
+ }
+
+ RawData.clear();
+ // Check for ULEB128 too large to fit into a uint64_t.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ writeULEB128LargerThan64Bits(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // Only the code was extracted correctly.
+ EXPECT_EQ(Offset, 1u);
+ }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviatioDeclSetChildExtractionError) {
+ SmallString<64> RawData;
+ const uint32_t Code = 1;
+ const dwarf::Tag Tag = DW_TAG_compile_unit;
+
+ // We want to make sure that we fail if we reach the end of the stream before
+ // reading the 'children' byte.
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ encodeULEB128(Tag, OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // The code and the tag were extracted correctly.
+ EXPECT_EQ(Offset, 2u);
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetAttributeExtractionError) {
+ SmallString<64> RawData;
+ const uint32_t Code = 1;
+ const dwarf::Tag Tag = DW_TAG_compile_unit;
+ const uint8_t Children = DW_CHILDREN_yes;
+
+ // Check for malformed ULEB128.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ encodeULEB128(Tag, OS);
+ OS << Children;
+ writeMalformedULEB128Value(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // The code, tag, and child byte were extracted correctly.
+ EXPECT_EQ(Offset, 3u);
+ }
+
+ RawData.clear();
+ // Check for ULEB128 too large to fit into a uint64_t.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ encodeULEB128(Tag, OS);
+ OS << Children;
+ writeULEB128LargerThan64Bits(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // The code, tag, and child byte were extracted correctly.
+ EXPECT_EQ(Offset, 3u);
+ }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetFormExtractionError) {
+ SmallString<64> RawData;
+ const uint32_t Code = 1;
+ const dwarf::Tag Tag = DW_TAG_compile_unit;
+ const uint8_t Children = DW_CHILDREN_yes;
+ const dwarf::Attribute Attr = DW_AT_name;
+
+ // Check for malformed ULEB128.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ encodeULEB128(Tag, OS);
+ OS << Children;
+ encodeULEB128(Attr, OS);
+ writeMalformedULEB128Value(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // The code, tag, child byte, and first attribute were extracted correctly.
+ EXPECT_EQ(Offset, 4u);
+ }
+
+ RawData.clear();
+ // Check for ULEB128 too large to fit into a uint64_t.
+ {
+ raw_svector_ostream OS(RawData);
+ encodeULEB128(Code, OS);
+ encodeULEB128(Tag, OS);
+ OS << Children;
+ encodeULEB128(Attr, OS);
+ writeULEB128LargerThan64Bits(OS);
+
+ uint64_t Offset = 0;
+ DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+ DWARFAbbreviationDeclarationSet AbbrevSet;
+ EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+ // The code, tag, child byte, and first attribute were extracted correctly.
+ EXPECT_EQ(Offset, 4u);
+ }
+}
More information about the llvm-commits
mailing list