[llvm] c19a289 - llvm-dwarfdump: Print warnings on invalid DWARF
Jan Kratochvil via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 27 02:41:29 PDT 2021
Author: Jan Kratochvil
Date: 2021-06-27T11:38:35+02:00
New Revision: c19a28919fc969a68401079532fafa6b1e44b6f4
URL: https://github.com/llvm/llvm-project/commit/c19a28919fc969a68401079532fafa6b1e44b6f4
DIFF: https://github.com/llvm/llvm-project/commit/c19a28919fc969a68401079532fafa6b1e44b6f4.diff
LOG: llvm-dwarfdump: Print warnings on invalid DWARF
llvm-dwarfdump was silent even when the format of DWARF was invalid
and/or llvm-dwarfdump did not understand/support some of the constructs.
This can be pretty confusing as llvm-dwarfdump is a tool for DWARF
producers+consumers development.
Review comments also by @dblaikie.
Reviewed By: jhenderson
Differential Revision: https://reviews.llvm.org/D104271
Added:
llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s
Modified:
llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 7d88e1447dca7..75b2280658f16 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -364,12 +364,17 @@ class DWARFContext : public DIContext {
getLocalsForAddress(object::SectionedAddress Address) override;
bool isLittleEndian() const { return DObj->isLittleEndian(); }
+ static unsigned getMaxSupportedVersion() { return 5; }
static bool isSupportedVersion(unsigned version) {
- return version == 2 || version == 3 || version == 4 || version == 5;
+ return version >= 2 && version <= getMaxSupportedVersion();
}
+ static SmallVector<uint8_t, 3> getSupportedAddressSizes() {
+ return {2, 4, 8};
+ }
static bool isAddressSizeSupported(unsigned AddressSize) {
- return AddressSize == 2 || AddressSize == 4 || AddressSize == 8;
+ return llvm::any_of(getSupportedAddressSizes(),
+ [=](auto Elem) { return Elem == AddressSize; });
}
std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index f07a4d8d27839..882aa69d9ccbe 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -47,6 +47,8 @@ class DWARFAbbreviationDeclarationSet {
return Decls.end();
}
+ std::string getCodeRange() const;
+
private:
void clear();
};
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index a0095acd9e357..a340fb3f02851 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -357,6 +357,8 @@ class DWARFUnit {
return StringOffsetsTableContribution->Base;
}
+ uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
+
const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 4afac2f995030..d91a630256d6f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -67,6 +67,35 @@ DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(
return &Decls[AbbrCode - FirstAbbrCode];
}
+std::string DWARFAbbreviationDeclarationSet::getCodeRange() const {
+ // Create a sorted list of all abbrev codes.
+ std::vector<uint32_t> Codes;
+ Codes.reserve(Decls.size());
+ for (const auto &Decl : Decls)
+ Codes.push_back(Decl.getCode());
+
+ std::string Buffer = "";
+ raw_string_ostream Stream(Buffer);
+ // Each iteration through this loop represents a single contiguous range in
+ // the set of codes.
+ for (auto Current = Codes.begin(), End = Codes.end(); Current != End;) {
+ uint32_t RangeStart = *Current;
+ // Add the current range start.
+ Stream << *Current;
+ uint32_t RangeEnd = RangeStart;
+ // Find the end of the current range.
+ while (++Current != End && *Current == RangeEnd + 1)
+ ++RangeEnd;
+ // If there is more than one value in the range, add the range end too.
+ if (RangeStart != RangeEnd)
+ Stream << "-" << RangeEnd;
+ // If there is at least one more range, add a separator.
+ if (Current != End)
+ Stream << ", ";
+ }
+ return Buffer;
+}
+
DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); }
void DWARFDebugAbbrev::clear() {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 2b7d0c3363a1d..7ebb0092c34a2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
@@ -30,17 +31,42 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
uint64_t UEndOffset, uint32_t D) {
Offset = *OffsetPtr;
Depth = D;
- if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
+ if (Offset >= UEndOffset) {
+ U.getContext().getWarningHandler()(
+ createStringError(errc::invalid_argument,
+ "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
+ "to offset 0x%8.8" PRIx64 " excl. "
+ "tries to read DIEs at offset 0x%8.8" PRIx64,
+ U.getOffset(), U.getNextUnitOffset(), *OffsetPtr));
return false;
+ }
+ assert(DebugInfoData.isValidOffset(UEndOffset - 1));
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
if (0 == AbbrCode) {
// NULL debug tag entry.
AbbrevDecl = nullptr;
return true;
}
- if (const auto *AbbrevSet = U.getAbbreviations())
- AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
- if (nullptr == AbbrevDecl) {
+ const auto *AbbrevSet = U.getAbbreviations();
+ if (!AbbrevSet) {
+ U.getContext().getWarningHandler()(
+ createStringError(errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation set offset 0x%" PRIx64,
+ U.getOffset(), U.getAbbreviationsOffset()));
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
+ AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+ if (!AbbrevDecl) {
+ U.getContext().getWarningHandler()(
+ createStringError(errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation %" PRIu64 " at "
+ "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+ U.getOffset(), AbbrCode, *OffsetPtr,
+ AbbrevSet->getCodeRange().c_str()));
// Restore the original offset.
*OffsetPtr = Offset;
return false;
@@ -62,6 +88,11 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
OffsetPtr, U.getFormParams())) {
// We failed to skip this attribute's value, restore the original offset
// and return the failure status.
+ U.getContext().getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid FORM_* 0x%" PRIx16 " at offset 0x%8.8" PRIx64,
+ U.getOffset(), AttrSpec.Form, *OffsetPtr));
*OffsetPtr = Offset;
return false;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index dad7231daafc8..57ac0a51698d6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -259,26 +259,73 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
} else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton)
DWOId = debug_info.getU64(offset_ptr, &Err);
- if (errorToBool(std::move(Err)))
+ if (Err) {
+ Context.getWarningHandler()(joinErrors(
+ createStringError(
+ errc::invalid_argument,
+ "DWARF unit at 0x%8.8" PRIx64 " cannot be parsed:", Offset),
+ std::move(Err)));
return false;
+ }
// Header fields all parsed, capture the size of this unit header.
assert(*offset_ptr - Offset <= 255 && "unexpected header size");
Size = uint8_t(*offset_ptr - Offset);
+ uint64_t NextCUOffset = Offset + getUnitLengthFieldByteSize() + getLength();
+
+ if (!debug_info.isValidOffset(getNextUnitOffset() - 1)) {
+ Context.getWarningHandler()(
+ createStringError(errc::invalid_argument,
+ "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
+ "to offset 0x%8.8" PRIx64 " excl. "
+ "extends past section size 0x%8.8zx",
+ Offset, NextCUOffset, debug_info.size()));
+ return false;
+ }
+
+ if (!DWARFContext::isSupportedVersion(getVersion())) {
+ Context.getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "has unsupported version %" PRIu16 ", supported are 2-%u",
+ Offset, getVersion(), DWARFContext::getMaxSupportedVersion()));
+ return false;
+ }
// Type offset is unit-relative; should be after the header and before
// the end of the current unit.
- bool TypeOffsetOK =
- !isTypeUnit()
- ? true
- : TypeOffset >= Size &&
- TypeOffset < getLength() + getUnitLengthFieldByteSize();
- bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
- bool VersionOK = DWARFContext::isSupportedVersion(getVersion());
- bool AddrSizeOK = DWARFContext::isAddressSizeSupported(getAddressByteSize());
-
- if (!LengthOK || !VersionOK || !AddrSizeOK || !TypeOffsetOK)
+ if (isTypeUnit() && TypeOffset < Size) {
+ Context.getWarningHandler()(
+ createStringError(errc::invalid_argument,
+ "DWARF type unit at offset "
+ "0x%8.8" PRIx64 " "
+ "has its relocated type_offset 0x%8.8" PRIx64 " "
+ "pointing inside the header",
+ Offset, Offset + TypeOffset));
+ return false;
+ }
+ if (isTypeUnit() &&
+ TypeOffset >= getUnitLengthFieldByteSize() + getLength()) {
+ Context.getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF type unit from offset 0x%8.8" PRIx64 " incl. "
+ "to offset 0x%8.8" PRIx64 " excl. has its "
+ "relocated type_offset 0x%8.8" PRIx64 " pointing past the unit end",
+ Offset, NextCUOffset, Offset + TypeOffset));
+ return false;
+ }
+
+ if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) {
+ SmallVector<std::string, 3> Sizes;
+ for (auto Size : DWARFContext::getSupportedAddressSizes())
+ Sizes.push_back(std::to_string(Size));
+ Context.getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "has unsupported address size %" PRIu8 ", supported are %s",
+ Offset, getAddressByteSize(), llvm::join(Sizes, ", ").c_str()));
return false;
+ }
// Keep track of the highest DWARF version we encounter across all units.
Context.setMaxVersionIfGreater(getVersion());
@@ -361,6 +408,8 @@ void DWARFUnit::extractDIEsToVector(
uint64_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntry DIE;
DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
+ // The end offset has been already checked by DWARFUnitHeader::extract.
+ assert(DebugInfoData.isValidOffset(NextCUOffset - 1));
uint32_t Depth = 0;
bool IsCUDie = true;
@@ -385,6 +434,8 @@ void DWARFUnit::extractDIEsToVector(
// Normal DIE
if (AbbrDecl->hasChildren())
++Depth;
+ else if (Depth == 0)
+ break; // This unit has a single DIE with no children.
} else {
// NULL DIE.
if (Depth > 0)
@@ -393,17 +444,6 @@ void DWARFUnit::extractDIEsToVector(
break; // We are done with this compile unit!
}
}
-
- // Give a little bit of info if we encounter corrupt DWARF (our offset
- // should always terminate at or before the start of the next compilation
- // unit header).
- if (DIEOffset > NextCUOffset)
- Context.getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF compile unit extends beyond its "
- "bounds cu 0x%8.8" PRIx64 " "
- "at 0x%8.8" PRIx64 "\n",
- getOffset(), DIEOffset));
}
void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
@@ -794,7 +834,7 @@ DWARFDie DWARFUnit::getLastChild(const DWARFDebugInfoEntry *Die) {
const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
if (!Abbrevs)
- Abbrevs = Abbrev->getAbbreviationDeclarationSet(Header.getAbbrOffset());
+ Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset());
return Abbrevs;
}
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s
new file mode 100644
index 0000000000000..712a3f30f60fc
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s
@@ -0,0 +1,111 @@
+## Test llvm-dwarfdump detects and reports invalid DWARF format of the file.
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=CUEND=1 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=CUEND %s
+# CUEND: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002b excl. tries to read DIEs at offset 0x0000002b
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVSETINVALID=1 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVSETINVALID %s
+# ABBREVSETINVALID: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation set offset 0x0
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVNO=2 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVNO %s
+# ABBREVNO: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation 2 at offset 0x00000018, valid abbreviations are 1, 5, 3-4
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=FORMNO=0xdead \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=FORMNO %s
+# FORMNO: warning: DWARF unit at offset 0x0000000c contains invalid FORM_* 0xdead at offset 0x00000018
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=SHORTINITLEN=1 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=SHORTINITLEN %s
+# SHORTINITLEN: warning: DWARF unit at 0x0000002c cannot be parsed:
+# SHORTINITLEN-NEXT: warning: unexpected end of data at offset 0x2d while reading [0x2c, 0x30)
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=1 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITBEFORE %s
+# BADTYPEUNITBEFORE: warning: DWARF type unit at offset 0x0000002c has its relocated type_offset 0x0000002d pointing inside the header
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=0x100 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITAFTER %s
+# BADTYPEUNITAFTER: warning: DWARF type unit from offset 0x0000002c incl. to offset 0x00000045 excl. has its relocated type_offset 0x0000012c pointing past the unit end
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=TOOLONG=1 \
+# RUN: | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=TOOLONG %s
+# TOOLONG: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002d excl. extends past section size 0x0000002c
+
+ .section .debug_abbrev,"", at progbits
+.ifndef ABBREVSETINVALID
+ .uleb128 1 # Abbreviation Code
+ .uleb128 17 # DW_TAG_compile_unit
+ .uleb128 1 # DW_CHILDREN_yes
+ .uleb128 37 # DW_AT_producer
+.ifndef FORMNO
+ .uleb128 8 # DW_FORM_string
+.else
+ .uleb128 FORMNO
+.endif
+ .uleb128 0 # end abbrev 1 DW_AT_*
+ .uleb128 0 # end abbrev 1 DW_FORM_*
+ .uleb128 5 # Abbreviation Code
+ .uleb128 10 # DW_TAG_label
+ .uleb128 0 # DW_CHILDREN_no
+ .uleb128 0 # end abbrev 4 DW_AT_*
+ .uleb128 0 # end abbrev 4 DW_FORM_*
+ .uleb128 3 # Abbreviation Code
+ .uleb128 10 # DW_TAG_label
+ .uleb128 0 # DW_CHILDREN_no
+ .uleb128 0 # end abbrev 3 DW_AT_*
+ .uleb128 0 # end abbrev 3 DW_FORM_*
+ .uleb128 4 # Abbreviation Code
+ .uleb128 10 # DW_TAG_label
+ .uleb128 0 # DW_CHILDREN_no
+ .uleb128 0 # end abbrev 4 DW_AT_*
+ .uleb128 0 # end abbrev 4 DW_FORM_*
+ .uleb128 0 # end abbrevs section
+.endif
+
+ .section .debug_info,"", at progbits
+## The first CU is here to shift the next CU being really tested to non-zero CU
+## offset to check more for error messages.
+ .long .Lcu_endp-.Lcu_startp # Length of Unit
+.Lcu_startp:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .uleb128 0 # End Of Children Mark
+.Lcu_endp:
+
+.ifndef TOOLONG
+.equ TOOLONG, 0
+.endif
+ .long .Lcu_end0-.Lcu_start0 + TOOLONG # Length of Unit
+.Lcu_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+.ifndef ABBREVNO
+ .uleb128 1 # Abbrev [1] DW_TAG_compile_unit
+.else
+ .uleb128 ABBREVNO
+.endif
+ .asciz "hand-written DWARF" # DW_AT_producer
+.ifndef CUEND
+ .uleb128 0 # End Of Children Mark
+.endif
+.Lcu_end0:
+
+.ifdef SHORTINITLEN
+ .byte 0x55 # Too short Length of Unit
+.endif
+.ifdef BADTYPEUNIT
+ .long .Lcu_end1-.Lcu_start1 # Length of Unit
+.Lcu_start1:
+ .short 5 # DWARF version number
+ .byte 2 # DW_UT_type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad 0xbaddefacedfacade # Type Signature
+ .long BADTYPEUNIT # Type DIE Offset
+ .uleb128 0 # End Of Children Mark
+.Lcu_end1:
+.endif
More information about the llvm-commits
mailing list