[llvm] c19a289 - llvm-dwarfdump: Print warnings on invalid DWARF

Jan Kratochvil via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 27 02:41:29 PDT 2021


Author: Jan Kratochvil
Date: 2021-06-27T11:38:35+02:00
New Revision: c19a28919fc969a68401079532fafa6b1e44b6f4

URL: https://github.com/llvm/llvm-project/commit/c19a28919fc969a68401079532fafa6b1e44b6f4
DIFF: https://github.com/llvm/llvm-project/commit/c19a28919fc969a68401079532fafa6b1e44b6f4.diff

LOG: llvm-dwarfdump: Print warnings on invalid DWARF

llvm-dwarfdump was silent even when the format of DWARF was invalid
and/or llvm-dwarfdump did not understand/support some of the constructs.
This can be pretty confusing as llvm-dwarfdump is a tool for DWARF
producers+consumers development.

Review comments also by @dblaikie.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D104271

Added: 
    llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s

Modified: 
    llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
    llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
    llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
    llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
    llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
    llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 7d88e1447dca7..75b2280658f16 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -364,12 +364,17 @@ class DWARFContext : public DIContext {
   getLocalsForAddress(object::SectionedAddress Address) override;
 
   bool isLittleEndian() const { return DObj->isLittleEndian(); }
+  static unsigned getMaxSupportedVersion() { return 5; }
   static bool isSupportedVersion(unsigned version) {
-    return version == 2 || version == 3 || version == 4 || version == 5;
+    return version >= 2 && version <= getMaxSupportedVersion();
   }
 
+  static SmallVector<uint8_t, 3> getSupportedAddressSizes() {
+    return {2, 4, 8};
+  }
   static bool isAddressSizeSupported(unsigned AddressSize) {
-    return AddressSize == 2 || AddressSize == 4 || AddressSize == 8;
+    return llvm::any_of(getSupportedAddressSizes(),
+                        [=](auto Elem) { return Elem == AddressSize; });
   }
 
   std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath);

diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index f07a4d8d27839..882aa69d9ccbe 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -47,6 +47,8 @@ class DWARFAbbreviationDeclarationSet {
     return Decls.end();
   }
 
+  std::string getCodeRange() const;
+
 private:
   void clear();
 };

diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index a0095acd9e357..a340fb3f02851 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -357,6 +357,8 @@ class DWARFUnit {
     return StringOffsetsTableContribution->Base;
   }
 
+  uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
+
   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
 
   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 4afac2f995030..d91a630256d6f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -67,6 +67,35 @@ DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(
   return &Decls[AbbrCode - FirstAbbrCode];
 }
 
+std::string DWARFAbbreviationDeclarationSet::getCodeRange() const {
+  // Create a sorted list of all abbrev codes.
+  std::vector<uint32_t> Codes;
+  Codes.reserve(Decls.size());
+  for (const auto &Decl : Decls)
+    Codes.push_back(Decl.getCode());
+
+  std::string Buffer = "";
+  raw_string_ostream Stream(Buffer);
+  // Each iteration through this loop represents a single contiguous range in
+  // the set of codes.
+  for (auto Current = Codes.begin(), End = Codes.end(); Current != End;) {
+    uint32_t RangeStart = *Current;
+    // Add the current range start.
+    Stream << *Current;
+    uint32_t RangeEnd = RangeStart;
+    // Find the end of the current range.
+    while (++Current != End && *Current == RangeEnd + 1)
+      ++RangeEnd;
+    // If there is more than one value in the range, add the range end too.
+    if (RangeStart != RangeEnd)
+      Stream << "-" << RangeEnd;
+    // If there is at least one more range, add a separator.
+    if (Current != End)
+      Stream << ", ";
+  }
+  return Buffer;
+}
+
 DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); }
 
 void DWARFDebugAbbrev::clear() {

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 2b7d0c3363a1d..7ebb0092c34a2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
@@ -30,17 +31,42 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
                                       uint64_t UEndOffset, uint32_t D) {
   Offset = *OffsetPtr;
   Depth = D;
-  if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
+  if (Offset >= UEndOffset) {
+    U.getContext().getWarningHandler()(
+        createStringError(errc::invalid_argument,
+                          "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
+                          "to offset 0x%8.8" PRIx64 " excl. "
+                          "tries to read DIEs at offset 0x%8.8" PRIx64,
+                          U.getOffset(), U.getNextUnitOffset(), *OffsetPtr));
     return false;
+  }
+  assert(DebugInfoData.isValidOffset(UEndOffset - 1));
   uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
   if (0 == AbbrCode) {
     // NULL debug tag entry.
     AbbrevDecl = nullptr;
     return true;
   }
-  if (const auto *AbbrevSet = U.getAbbreviations())
-    AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
-  if (nullptr == AbbrevDecl) {
+  const auto *AbbrevSet = U.getAbbreviations();
+  if (!AbbrevSet) {
+    U.getContext().getWarningHandler()(
+        createStringError(errc::invalid_argument,
+                          "DWARF unit at offset 0x%8.8" PRIx64 " "
+                          "contains invalid abbreviation set offset 0x%" PRIx64,
+                          U.getOffset(), U.getAbbreviationsOffset()));
+    // Restore the original offset.
+    *OffsetPtr = Offset;
+    return false;
+  }
+  AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+  if (!AbbrevDecl) {
+    U.getContext().getWarningHandler()(
+        createStringError(errc::invalid_argument,
+                          "DWARF unit at offset 0x%8.8" PRIx64 " "
+                          "contains invalid abbreviation %" PRIu64 " at "
+                          "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+                          U.getOffset(), AbbrCode, *OffsetPtr,
+                          AbbrevSet->getCodeRange().c_str()));
     // Restore the original offset.
     *OffsetPtr = Offset;
     return false;
@@ -62,6 +88,11 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
                                           OffsetPtr, U.getFormParams())) {
       // We failed to skip this attribute's value, restore the original offset
       // and return the failure status.
+      U.getContext().getWarningHandler()(createStringError(
+          errc::invalid_argument,
+          "DWARF unit at offset 0x%8.8" PRIx64 " "
+          "contains invalid FORM_* 0x%" PRIx16 " at offset 0x%8.8" PRIx64,
+          U.getOffset(), AttrSpec.Form, *OffsetPtr));
       *OffsetPtr = Offset;
       return false;
     }

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index dad7231daafc8..57ac0a51698d6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -259,26 +259,73 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
   } else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton)
     DWOId = debug_info.getU64(offset_ptr, &Err);
 
-  if (errorToBool(std::move(Err)))
+  if (Err) {
+    Context.getWarningHandler()(joinErrors(
+        createStringError(
+            errc::invalid_argument,
+            "DWARF unit at 0x%8.8" PRIx64 " cannot be parsed:", Offset),
+        std::move(Err)));
     return false;
+  }
 
   // Header fields all parsed, capture the size of this unit header.
   assert(*offset_ptr - Offset <= 255 && "unexpected header size");
   Size = uint8_t(*offset_ptr - Offset);
+  uint64_t NextCUOffset = Offset + getUnitLengthFieldByteSize() + getLength();
+
+  if (!debug_info.isValidOffset(getNextUnitOffset() - 1)) {
+    Context.getWarningHandler()(
+        createStringError(errc::invalid_argument,
+                          "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
+                          "to offset  0x%8.8" PRIx64 " excl. "
+                          "extends past section size 0x%8.8zx",
+                          Offset, NextCUOffset, debug_info.size()));
+    return false;
+  }
+
+  if (!DWARFContext::isSupportedVersion(getVersion())) {
+    Context.getWarningHandler()(createStringError(
+        errc::invalid_argument,
+        "DWARF unit at offset 0x%8.8" PRIx64 " "
+        "has unsupported version %" PRIu16 ", supported are 2-%u",
+        Offset, getVersion(), DWARFContext::getMaxSupportedVersion()));
+    return false;
+  }
 
   // Type offset is unit-relative; should be after the header and before
   // the end of the current unit.
-  bool TypeOffsetOK =
-      !isTypeUnit()
-          ? true
-          : TypeOffset >= Size &&
-                TypeOffset < getLength() + getUnitLengthFieldByteSize();
-  bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
-  bool VersionOK = DWARFContext::isSupportedVersion(getVersion());
-  bool AddrSizeOK = DWARFContext::isAddressSizeSupported(getAddressByteSize());
-
-  if (!LengthOK || !VersionOK || !AddrSizeOK || !TypeOffsetOK)
+  if (isTypeUnit() && TypeOffset < Size) {
+    Context.getWarningHandler()(
+        createStringError(errc::invalid_argument,
+                          "DWARF type unit at offset "
+                          "0x%8.8" PRIx64 " "
+                          "has its relocated type_offset 0x%8.8" PRIx64 " "
+                          "pointing inside the header",
+                          Offset, Offset + TypeOffset));
+    return false;
+  }
+  if (isTypeUnit() &&
+      TypeOffset >= getUnitLengthFieldByteSize() + getLength()) {
+    Context.getWarningHandler()(createStringError(
+        errc::invalid_argument,
+        "DWARF type unit from offset 0x%8.8" PRIx64 " incl. "
+        "to offset 0x%8.8" PRIx64 " excl. has its "
+        "relocated type_offset 0x%8.8" PRIx64 " pointing past the unit end",
+        Offset, NextCUOffset, Offset + TypeOffset));
+    return false;
+  }
+
+  if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) {
+    SmallVector<std::string, 3> Sizes;
+    for (auto Size : DWARFContext::getSupportedAddressSizes())
+      Sizes.push_back(std::to_string(Size));
+    Context.getWarningHandler()(createStringError(
+        errc::invalid_argument,
+        "DWARF unit at offset 0x%8.8" PRIx64 " "
+        "has unsupported address size %" PRIu8 ", supported are %s",
+        Offset, getAddressByteSize(), llvm::join(Sizes, ", ").c_str()));
     return false;
+  }
 
   // Keep track of the highest DWARF version we encounter across all units.
   Context.setMaxVersionIfGreater(getVersion());
@@ -361,6 +408,8 @@ void DWARFUnit::extractDIEsToVector(
   uint64_t NextCUOffset = getNextUnitOffset();
   DWARFDebugInfoEntry DIE;
   DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
+  // The end offset has been already checked by DWARFUnitHeader::extract.
+  assert(DebugInfoData.isValidOffset(NextCUOffset - 1));
   uint32_t Depth = 0;
   bool IsCUDie = true;
 
@@ -385,6 +434,8 @@ void DWARFUnit::extractDIEsToVector(
       // Normal DIE
       if (AbbrDecl->hasChildren())
         ++Depth;
+      else if (Depth == 0)
+        break; // This unit has a single DIE with no children.
     } else {
       // NULL DIE.
       if (Depth > 0)
@@ -393,17 +444,6 @@ void DWARFUnit::extractDIEsToVector(
         break;  // We are done with this compile unit!
     }
   }
-
-  // Give a little bit of info if we encounter corrupt DWARF (our offset
-  // should always terminate at or before the start of the next compilation
-  // unit header).
-  if (DIEOffset > NextCUOffset)
-    Context.getWarningHandler()(
-        createStringError(errc::invalid_argument,
-                          "DWARF compile unit extends beyond its "
-                          "bounds cu 0x%8.8" PRIx64 " "
-                          "at 0x%8.8" PRIx64 "\n",
-                          getOffset(), DIEOffset));
 }
 
 void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
@@ -794,7 +834,7 @@ DWARFDie DWARFUnit::getLastChild(const DWARFDebugInfoEntry *Die) {
 
 const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
   if (!Abbrevs)
-    Abbrevs = Abbrev->getAbbreviationDeclarationSet(Header.getAbbrOffset());
+    Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset());
   return Abbrevs;
 }
 

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s
new file mode 100644
index 0000000000000..712a3f30f60fc
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-entry-invalid.s
@@ -0,0 +1,111 @@
+## Test llvm-dwarfdump detects and reports invalid DWARF format of the file.
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=CUEND=1 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=CUEND %s
+# CUEND: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002b excl. tries to read DIEs at offset 0x0000002b
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVSETINVALID=1 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVSETINVALID %s
+# ABBREVSETINVALID: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation set offset 0x0
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=ABBREVNO=2 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=ABBREVNO %s
+# ABBREVNO: warning: DWARF unit at offset 0x0000000c contains invalid abbreviation 2 at offset 0x00000018, valid abbreviations are 1, 5, 3-4
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=FORMNO=0xdead \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=FORMNO %s
+# FORMNO: warning: DWARF unit at offset 0x0000000c contains invalid FORM_* 0xdead at offset 0x00000018
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=SHORTINITLEN=1 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=SHORTINITLEN %s
+# SHORTINITLEN:      warning: DWARF unit at 0x0000002c cannot be parsed:
+# SHORTINITLEN-NEXT: warning: unexpected end of data at offset 0x2d while reading [0x2c, 0x30)
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=1 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITBEFORE %s
+# BADTYPEUNITBEFORE: warning: DWARF type unit at offset 0x0000002c has its relocated type_offset 0x0000002d pointing inside the header
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=BADTYPEUNIT=0x100 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=BADTYPEUNITAFTER %s
+# BADTYPEUNITAFTER: warning: DWARF type unit from offset 0x0000002c incl. to offset 0x00000045 excl. has its relocated type_offset 0x0000012c pointing past the unit end
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj --defsym=TOOLONG=1 \
+# RUN:   | llvm-dwarfdump - 2>&1 | FileCheck --check-prefix=TOOLONG %s
+# TOOLONG: warning: DWARF unit from offset 0x0000000c incl. to offset 0x0000002d excl. extends past section size 0x0000002c
+
+        .section .debug_abbrev,"", at progbits
+.ifndef ABBREVSETINVALID
+        .uleb128 1                      # Abbreviation Code
+        .uleb128 17                     # DW_TAG_compile_unit
+        .uleb128 1                      # DW_CHILDREN_yes
+        .uleb128 37                     # DW_AT_producer
+.ifndef FORMNO
+        .uleb128 8                      # DW_FORM_string
+.else
+        .uleb128 FORMNO       
+.endif
+        .uleb128 0                      # end abbrev 1 DW_AT_*
+        .uleb128 0                      # end abbrev 1 DW_FORM_*
+        .uleb128 5                      # Abbreviation Code
+        .uleb128 10                     # DW_TAG_label
+        .uleb128 0                      # DW_CHILDREN_no
+        .uleb128 0                      # end abbrev 4 DW_AT_*
+        .uleb128 0                      # end abbrev 4 DW_FORM_*
+        .uleb128 3                      # Abbreviation Code
+        .uleb128 10                     # DW_TAG_label
+        .uleb128 0                      # DW_CHILDREN_no
+        .uleb128 0                      # end abbrev 3 DW_AT_*
+        .uleb128 0                      # end abbrev 3 DW_FORM_*
+        .uleb128 4                      # Abbreviation Code
+        .uleb128 10                     # DW_TAG_label
+        .uleb128 0                      # DW_CHILDREN_no
+        .uleb128 0                      # end abbrev 4 DW_AT_*
+        .uleb128 0                      # end abbrev 4 DW_FORM_*
+        .uleb128 0                      # end abbrevs section
+.endif
+
+        .section .debug_info,"", at progbits
+## The first CU is here to shift the next CU being really tested to non-zero CU
+## offset to check more for error messages.
+        .long    .Lcu_endp-.Lcu_startp  # Length of Unit
+.Lcu_startp:
+        .short   4                      # DWARF version number
+        .long    .debug_abbrev          # Offset Into Abbrev. Section
+        .byte    8                      # Address Size (in bytes)
+        .uleb128 0                      # End Of Children Mark
+.Lcu_endp:
+
+.ifndef TOOLONG
+.equ TOOLONG, 0
+.endif
+        .long    .Lcu_end0-.Lcu_start0 + TOOLONG  # Length of Unit
+.Lcu_start0:
+        .short   4                      # DWARF version number
+        .long    .debug_abbrev          # Offset Into Abbrev. Section
+        .byte    8                      # Address Size (in bytes)
+.ifndef ABBREVNO
+        .uleb128 1                      # Abbrev [1] DW_TAG_compile_unit
+.else
+        .uleb128 ABBREVNO       
+.endif
+        .asciz  "hand-written DWARF"    # DW_AT_producer
+.ifndef CUEND
+        .uleb128 0                      # End Of Children Mark
+.endif
+.Lcu_end0:
+
+.ifdef SHORTINITLEN
+        .byte    0x55                   # Too short Length of Unit
+.endif
+.ifdef BADTYPEUNIT
+        .long    .Lcu_end1-.Lcu_start1  # Length of Unit
+.Lcu_start1:    
+        .short   5                      # DWARF version number
+        .byte    2                      # DW_UT_type
+        .byte    8                      # Address Size (in bytes)
+        .long    .debug_abbrev          # Offset Into Abbrev. Section
+        .quad    0xbaddefacedfacade     # Type Signature
+        .long    BADTYPEUNIT            # Type DIE Offset
+        .uleb128 0                      # End Of Children Mark
+.Lcu_end1:
+.endif


        


More information about the llvm-commits mailing list