[llvm] r370363 - DWARFDebugLoc: Make parsing and error reporting more robust

Pavel Labath via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 07:26:05 PDT 2019


Author: labath
Date: Thu Aug 29 07:26:05 2019
New Revision: 370363

URL: http://llvm.org/viewvc/llvm-project?rev=370363&view=rev
Log:
DWARFDebugLoc: Make parsing and error reporting more robust

Summary:
While examining this class for possible use in lldb, I noticed two
things:
- it spits out parsing errors directly to stderr
- the loclists parser can incorrectly return valid location lists when
  parsing malformed (truncated) data

I improve the stderr situation by making the parseOneLocationList
functions return Expected<T>s. The errors are still dumped to stderr by
their callers, so this is only a partial fix, but it is enough for my
use case, as I intend to parse the locations lists one by one.

I fix the behavior in the truncated scenario by using the newly
introduced DataExtractor Cursor API.

I also add tests for handling the error cases, as they currently have no
coverage.

Reviewers: dblaikie, JDevlieghere, probinson

Subscribers: lldb-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63591

Added:
    llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loc-error-cases.s
    llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loclists-error-cases.s
Modified:
    llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
    llvm/trunk/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
    llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h?rev=370363&r1=370362&r2=370363&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h Thu Aug 29 07:26:05 2019
@@ -29,7 +29,7 @@ public:
     /// The ending address of the instruction range.
     uint64_t End;
     /// The location of the variable within the specified range.
-    SmallString<4> Loc;
+    SmallVector<uint8_t, 4> Loc;
   };
 
   /// A list of locations that contain one variable.
@@ -68,8 +68,8 @@ public:
   /// Return the location list at the given offset or nullptr.
   LocationList const *getLocationListAtOffset(uint64_t Offset) const;
 
-  Optional<LocationList> parseOneLocationList(DWARFDataExtractor Data,
-                                              uint64_t *Offset);
+  static Expected<LocationList>
+  parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset);
 };
 
 class DWARFDebugLoclists {
@@ -78,7 +78,7 @@ public:
     uint8_t Kind;
     uint64_t Value0;
     uint64_t Value1;
-    SmallVector<char, 4> Loc;
+    SmallVector<uint8_t, 4> Loc;
   };
 
   struct LocationList {
@@ -106,8 +106,9 @@ public:
   /// Return the location list at the given offset or nullptr.
   LocationList const *getLocationListAtOffset(uint64_t Offset) const;
 
-  static Optional<LocationList>
-  parseOneLocationList(DataExtractor Data, uint64_t *Offset, unsigned Version);
+  static Expected<LocationList> parseOneLocationList(const DataExtractor &Data,
+                                                     uint64_t *Offset,
+                                                     unsigned Version);
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp?rev=370363&r1=370362&r2=370363&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp (original)
+++ llvm/trunk/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp Thu Aug 29 07:26:05 2019
@@ -28,11 +28,10 @@ using namespace llvm;
 // expression that LLVM doesn't produce. Guessing the wrong version means we
 // won't be able to pretty print expressions in DWARF2 binaries produced by
 // non-LLVM tools.
-static void dumpExpression(raw_ostream &OS, ArrayRef<char> Data,
+static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data,
                            bool IsLittleEndian, unsigned AddressSize,
                            const MCRegisterInfo *MRI, DWARFUnit *U) {
-  DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()),
-                               IsLittleEndian, AddressSize);
+  DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize);
   DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
 }
 
@@ -83,47 +82,37 @@ void DWARFDebugLoc::dump(raw_ostream &OS
   }
 }
 
-Optional<DWARFDebugLoc::LocationList>
-DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, uint64_t *Offset) {
+Expected<DWARFDebugLoc::LocationList>
+DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data,
+                                    uint64_t *Offset) {
   LocationList LL;
   LL.Offset = *Offset;
+  DataExtractor::Cursor C(*Offset);
 
   // 2.6.2 Location Lists
   // A location list entry consists of:
   while (true) {
     Entry E;
-    if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) {
-      WithColor::error() << "location list overflows the debug_loc section.\n";
-      return None;
-    }
 
     // 1. A beginning address offset. ...
-    E.Begin = Data.getRelocatedAddress(Offset);
+    E.Begin = Data.getRelocatedAddress(C);
 
     // 2. An ending address offset. ...
-    E.End = Data.getRelocatedAddress(Offset);
+    E.End = Data.getRelocatedAddress(C);
 
+    if (Error Err = C.takeError())
+      return std::move(Err);
     // The end of any given location list is marked by an end of list entry,
     // which consists of a 0 for the beginning address offset and a 0 for the
     // ending address offset.
-    if (E.Begin == 0 && E.End == 0)
+    if (E.Begin == 0 && E.End == 0) {
+      *Offset = C.tell();
       return LL;
-
-    if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) {
-      WithColor::error() << "location list overflows the debug_loc section.\n";
-      return None;
     }
 
-    unsigned Bytes = Data.getU16(Offset);
-    if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) {
-      WithColor::error() << "location list overflows the debug_loc section.\n";
-      return None;
-    }
+    unsigned Bytes = Data.getU16(C);
     // A single location description describing the location of the object...
-    StringRef str = Data.getData().substr(*Offset, Bytes);
-    *Offset += Bytes;
-    E.Loc.reserve(str.size());
-    llvm::copy(str, std::back_inserter(E.Loc));
+    Data.getU8(C, E.Loc, Bytes);
     LL.Entries.push_back(std::move(E));
   }
 }
@@ -133,67 +122,65 @@ void DWARFDebugLoc::parse(const DWARFDat
   AddressSize = data.getAddressSize();
 
   uint64_t Offset = 0;
-  while (data.isValidOffset(Offset + data.getAddressSize() - 1)) {
+  while (Offset < data.getData().size()) {
     if (auto LL = parseOneLocationList(data, &Offset))
       Locations.push_back(std::move(*LL));
-    else
+    else {
+      logAllUnhandledErrors(LL.takeError(), WithColor::error());
       break;
+    }
   }
-  if (data.isValidOffset(Offset))
-    WithColor::error() << "failed to consume entire .debug_loc section\n";
 }
 
-Optional<DWARFDebugLoclists::LocationList>
-DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, uint64_t *Offset,
-                                         unsigned Version) {
+Expected<DWARFDebugLoclists::LocationList>
+DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data,
+                                         uint64_t *Offset, unsigned Version) {
   LocationList LL;
   LL.Offset = *Offset;
+  DataExtractor::Cursor C(*Offset);
 
   // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
-  while (auto Kind =
-             static_cast<dwarf::LocationListEntry>(Data.getU8(Offset))) {
-
+  while (auto Kind = static_cast<dwarf::LocationListEntry>(Data.getU8(C))) {
     Entry E;
     E.Kind = Kind;
     switch (Kind) {
     case dwarf::DW_LLE_startx_length:
-      E.Value0 = Data.getULEB128(Offset);
+      E.Value0 = Data.getULEB128(C);
       // Pre-DWARF 5 has different interpretation of the length field. We have
       // to support both pre- and standartized styles for the compatibility.
       if (Version < 5)
-        E.Value1 = Data.getU32(Offset);
+        E.Value1 = Data.getU32(C);
       else
-        E.Value1 = Data.getULEB128(Offset);
+        E.Value1 = Data.getULEB128(C);
       break;
     case dwarf::DW_LLE_start_length:
-      E.Value0 = Data.getAddress(Offset);
-      E.Value1 = Data.getULEB128(Offset);
+      E.Value0 = Data.getAddress(C);
+      E.Value1 = Data.getULEB128(C);
       break;
     case dwarf::DW_LLE_offset_pair:
-      E.Value0 = Data.getULEB128(Offset);
-      E.Value1 = Data.getULEB128(Offset);
+      E.Value0 = Data.getULEB128(C);
+      E.Value1 = Data.getULEB128(C);
       break;
     case dwarf::DW_LLE_base_address:
-      E.Value0 = Data.getAddress(Offset);
+      E.Value0 = Data.getAddress(C);
       break;
     default:
-      WithColor::error() << "dumping support for LLE of kind " << (int)Kind
-                         << " not implemented\n";
-      return None;
+      cantFail(C.takeError());
+      return createStringError(errc::illegal_byte_sequence,
+                               "LLE of kind %x not supported", (int)Kind);
     }
 
     if (Kind != dwarf::DW_LLE_base_address) {
-      unsigned Bytes =
-          Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
+      unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C);
       // A single location description describing the location of the object...
-      StringRef str = Data.getData().substr(*Offset, Bytes);
-      *Offset += Bytes;
-      E.Loc.resize(str.size());
-      llvm::copy(str, E.Loc.begin());
+      Data.getU8(C, E.Loc, Bytes);
     }
 
     LL.Entries.push_back(std::move(E));
   }
+  if (Error Err = C.takeError())
+    return std::move(Err);
+  *Offset = C.tell();
   return LL;
 }
 
@@ -202,11 +189,13 @@ void DWARFDebugLoclists::parse(DataExtra
   AddressSize = data.getAddressSize();
 
   uint64_t Offset = 0;
-  while (data.isValidOffset(Offset)) {
+  while (Offset < data.getData().size()) {
     if (auto LL = parseOneLocationList(data, &Offset, Version))
       Locations.push_back(std::move(*LL));
-    else
+    else {
+      logAllUnhandledErrors(LL.takeError(), WithColor::error());
       return;
+    }
   }
 }
 

Modified: llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp?rev=370363&r1=370362&r2=370363&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp (original)
+++ llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp Thu Aug 29 07:26:05 2019
@@ -466,9 +466,9 @@ unsigned DWARFVerifier::verifyDebugInfoA
     ReportError("DIE has invalid DW_AT_stmt_list encoding:");
     break;
   case DW_AT_location: {
-    auto VerifyLocationExpr = [&](StringRef D) {
+    auto VerifyLocationExpr = [&](ArrayRef<uint8_t> D) {
       DWARFUnit *U = Die.getDwarfUnit();
-      DataExtractor Data(D, DCtx.isLittleEndian(), 0);
+      DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0);
       DWARFExpression Expression(Data, U->getVersion(),
                                  U->getAddressByteSize());
       bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
@@ -479,7 +479,7 @@ unsigned DWARFVerifier::verifyDebugInfoA
     };
     if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
       // Verify inlined location.
-      VerifyLocationExpr(llvm::toStringRef(*Expr));
+      VerifyLocationExpr(*Expr);
     } else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) {
       // Verify location list.
       if (auto DebugLoc = DCtx.getDebugLoc())
@@ -1277,9 +1277,9 @@ static bool isVariableIndexable(const DW
   if (!Location)
     return false;
 
-  auto ContainsInterestingOperators = [&](StringRef D) {
+  auto ContainsInterestingOperators = [&](ArrayRef<uint8_t> D) {
     DWARFUnit *U = Die.getDwarfUnit();
-    DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize());
+    DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize());
     DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
     return any_of(Expression, [](DWARFExpression::Operation &Op) {
       return !Op.isError() && (Op.getCode() == DW_OP_addr ||
@@ -1290,7 +1290,7 @@ static bool isVariableIndexable(const DW
 
   if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
     // Inlined location.
-    if (ContainsInterestingOperators(toStringRef(*Expr)))
+    if (ContainsInterestingOperators(*Expr))
       return true;
   } else if (Optional<uint64_t> Offset = Location->getAsSectionOffset()) {
     // Location list.

Added: llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loc-error-cases.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loc-error-cases.s?rev=370363&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loc-error-cases.s (added)
+++ llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loc-error-cases.s Thu Aug 29 07:26:05 2019
@@ -0,0 +1,58 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE1=0 -o %t1.o
+# RUN: llvm-dwarfdump -debug-loc %t1.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE2=0 -o %t2.o
+# RUN: llvm-dwarfdump -debug-loc %t2.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE3=0 -o %t3.o
+# RUN: llvm-dwarfdump -debug-loc %t3.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE4=0 -o %t4.o
+# RUN: llvm-dwarfdump -debug-loc %t4.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE5=0 -o %t5.o
+# RUN: llvm-dwarfdump -debug-loc %t5.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE6=0 -o %t6.o
+# RUN: llvm-dwarfdump -debug-loc %t6.o 2>&1 | FileCheck %s
+
+# CHECK: error: unexpected end of data
+
+.section  .debug_loc,"", at progbits
+.ifdef CASE1
+  .byte  1                       # bogus
+.endif
+.ifdef CASE2
+  .long  0                       # starting offset
+.endif
+.ifdef CASE3
+  .long  0                       # starting offset
+  .long  1                       # ending offset
+.endif
+.ifdef CASE4
+  .long  0                       # starting offset
+  .long  1                       # ending offset
+  .word  0                       # Loc expr size
+.endif
+.ifdef CASE5
+  .long  0                       # starting offset
+  .long  1                       # ending offset
+  .word  0                       # Loc expr size
+  .long  0                       # starting offset
+.endif
+.ifdef CASE6
+  .long  0                       # starting offset
+  .long  1                       # ending offset
+  .word  0xffff                  # Loc expr size
+.endif
+
+# A minimal compile unit is needed to deduce the address size of the location
+# lists
+.section  .debug_info,"", at progbits
+  .long  .Lcu_end0-.Lcu_begin0   # Length of Unit
+.Lcu_begin0:
+  .short  4                      # DWARF version number
+  .long  0                       # Offset Into Abbrev. Section
+  .byte  8                       # Address Size (in bytes)
+  .byte  0                       # End Of Children Mark
+.Lcu_end0:

Added: llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loclists-error-cases.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loclists-error-cases.s?rev=370363&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loclists-error-cases.s (added)
+++ llvm/trunk/test/DebugInfo/X86/dwarfdump-debug-loclists-error-cases.s Thu Aug 29 07:26:05 2019
@@ -0,0 +1,71 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE1=0 -o %t1.o
+# RUN: llvm-dwarfdump -debug-loclists %t1.o 2>&1 | FileCheck %s --check-prefix=ULEB
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE2=0 -o %t2.o
+# RUN: llvm-dwarfdump -debug-loclists %t2.o 2>&1 | FileCheck %s --check-prefix=ULEB
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE3=0 -o %t3.o
+# RUN: llvm-dwarfdump -debug-loclists %t3.o 2>&1 | FileCheck %s --check-prefix=ULEB
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE4=0 -o %t4.o
+# RUN: llvm-dwarfdump -debug-loclists %t4.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE5=0 -o %t5.o
+# RUN: llvm-dwarfdump -debug-loclists %t5.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE6=0 -o %t6.o
+# RUN: llvm-dwarfdump -debug-loclists %t6.o 2>&1 | FileCheck %s
+
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE7=0 -o %t7.o
+# RUN: llvm-dwarfdump -debug-loclists %t7.o 2>&1 | FileCheck %s --check-prefix=UNIMPL
+
+# CHECK: error: unexpected end of data
+# ULEB: error: malformed uleb128, extends past end
+# UNIMPL: error: LLE of kind 47 not supported
+
+.section  .debug_loclists,"", at progbits
+  .long  .Ldebug_loclist_table_end0-.Ldebug_loclist_table_start0
+.Ldebug_loclist_table_start0:
+ .short 5                        # Version.
+ .byte 8                         # Address size.
+ .byte 0                         # Segment selector size.
+ .long 0                         # Offset entry count.
+.Lloclists_table_base0:
+.Ldebug_loc0:
+.ifdef CASE1
+  .byte  4                       # DW_LLE_offset_pair
+.endif
+.ifdef CASE2
+  .byte  4                       # DW_LLE_offset_pair
+  .uleb128 0x0                   #   starting offset
+.endif
+.ifdef CASE3
+  .byte  4                       # DW_LLE_offset_pair
+  .uleb128 0x0                   #   starting offset
+  .uleb128 0x10                  #   ending offset
+.endif
+.ifdef CASE4
+  .byte  4                       # DW_LLE_offset_pair
+  .uleb128 0x0                   #   starting offset
+  .uleb128 0x10                  #   ending offset
+  .byte  1                       # Loc expr size
+.endif
+.ifdef CASE5
+  .byte  4                       # DW_LLE_offset_pair
+  .uleb128 0x0                   #   starting offset
+  .uleb128 0x10                  #   ending offset
+  .byte  1                       # Loc expr size
+  .byte  117                     # DW_OP_breg5
+.endif
+.ifdef CASE6
+  .byte  4                       # DW_LLE_offset_pair
+  .uleb128 0x0                   #   starting offset
+  .uleb128 0x10                  #   ending offset
+  .uleb128 0xdeadbeef            # Loc expr size
+.endif
+.ifdef CASE7
+  .byte 0x47
+.endif
+
+.Ldebug_loclist_table_end0:
+




More information about the llvm-commits mailing list