[llvm] 51b4610 - Support DW_FORM_strx* in llvm-dwp.

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 26 12:32:57 PDT 2021


Author: Ali Tamur
Date: 2021-04-26T12:32:45-07:00
New Revision: 51b461074385aa8cc141809fa27070544b6ac34c

URL: https://github.com/llvm/llvm-project/commit/51b461074385aa8cc141809fa27070544b6ac34c
DIFF: https://github.com/llvm/llvm-project/commit/51b461074385aa8cc141809fa27070544b6ac34c.diff

LOG: Support DW_FORM_strx* in llvm-dwp.

Currently llvm-dwp only handled DW_FORM_string and DW_FORM_GNU_str_index; with this patch it also starts to handle DW_FORM_strx[1-4]?

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D75485

Added: 
    llvm/test/tools/llvm-dwp/Inputs/handle_strx/dw5.dwo
    llvm/test/tools/llvm-dwp/X86/handle_strx.test
    llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length.s
    llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length_type.s
    llvm/test/tools/llvm-dwp/X86/invalid_cu_header_version.s

Modified: 
    llvm/test/tools/llvm-dwp/X86/invalid_string_form.test
    llvm/test/tools/llvm-dwp/X86/unsupported_cu_index_version.s
    llvm/tools/llvm-dwp/llvm-dwp.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-dwp/Inputs/handle_strx/dw5.dwo b/llvm/test/tools/llvm-dwp/Inputs/handle_strx/dw5.dwo
new file mode 100644
index 0000000000000..c2f287c728782
Binary files /dev/null and b/llvm/test/tools/llvm-dwp/Inputs/handle_strx/dw5.dwo 
diff er

diff  --git a/llvm/test/tools/llvm-dwp/X86/handle_strx.test b/llvm/test/tools/llvm-dwp/X86/handle_strx.test
new file mode 100644
index 0000000000000..8fd6fe5b725f3
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/handle_strx.test
@@ -0,0 +1,14 @@
+RUN: llvm-dwp %p/../Inputs/handle_strx/dw5.dwo -o %t 2>/dev/null
+RUN: llvm-dwarfdump --verbose %t 2>/dev/null | FileCheck --check-prefix=READ_STRX %s
+
+RUN: not llvm-dwp %p/../Inputs/handle_strx/dw5.dwo %p/../Inputs/handle_strx/dw5.dwo -o %t 2>&1 \
+RUN:   | FileCheck --check-prefix=PARSE_STRX %s
+
+
+dw5.o is compiled from a file that contains a single empty void function,
+with options -gdwarf-5 and -gsplit-dwarf.
+
+READ_STRX: DW_AT_name [DW_FORM_strx1]{{.*}}dw5.cc
+
+PARSE_STRX: error: duplicate DWO ID ({{.*}}) in 'dw5.cc' and 'dw5.cc'
+

diff  --git a/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length.s b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length.s
new file mode 100644
index 0000000000000..8f32822eebe65
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \
+# RUN:         -split-dwarf-file=%t.dwo -dwarf-version=5
+# RUN: not llvm-dwp %t.dwo -o %t.dwp 2>&1 | FileCheck %s
+
+# CHECK: error: compile unit exceeds .debug_info section range: 20 >= 6
+    .section	.debug_info.dwo,"e", at progbits
+	  .long	16      # Length of Unit
+    .short 5      # Version
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length_type.s b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length_type.s
new file mode 100644
index 0000000000000..429ae01422c87
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_length_type.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \
+# RUN:         -split-dwarf-file=%t.dwo -dwarf-version=5
+# RUN: not llvm-dwp %t.dwo -o %t.dwp 2>&1 | FileCheck %s
+
+# CHECK: error: cannot parse compile unit length: unexpected end of data at offset 0x2 while reading [0x0, 0x4
+    .section	.debug_info.dwo,"e", at progbits
+    .short	0 # Length of Unit

diff  --git a/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_version.s b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_version.s
new file mode 100644
index 0000000000000..bb55e22abb4d0
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/invalid_cu_header_version.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \
+# RUN:         -split-dwarf-file=%t.dwo -dwarf-version=5
+# RUN: not llvm-dwp %t.dwo -o %t.dwp 2>&1 | FileCheck %s
+
+# CHECK: error: cannot parse compile unit version: unexpected end of data at offset 0x4 while reading [0x4, 0x6)
+    .section	.debug_info.dwo,"e", at progbits
+    .long	0 # Length of Unit

diff  --git a/llvm/test/tools/llvm-dwp/X86/invalid_string_form.test b/llvm/test/tools/llvm-dwp/X86/invalid_string_form.test
index 2d3cf6fe28c6c..e17c5a51a999c 100644
--- a/llvm/test/tools/llvm-dwp/X86/invalid_string_form.test
+++ b/llvm/test/tools/llvm-dwp/X86/invalid_string_form.test
@@ -1,3 +1,4 @@
 RUN: not llvm-dwp %p/../Inputs/invalid_string_form.dwo -o %t 2>&1 | FileCheck %s
 
-CHECK: error: {{.*}}invalid_string_form.dwo': string field encoded without DW_FORM_string or DW_FORM_GNU_str_index
+CHECK: error: {{.*}}: string field must be encoded with one of the following:
+CHECK: DW_FORM_string, DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, DW_FORM_strx4, or DW_FORM_GNU_str_index.

diff  --git a/llvm/test/tools/llvm-dwp/X86/unsupported_cu_index_version.s b/llvm/test/tools/llvm-dwp/X86/unsupported_cu_index_version.s
index 84850fcc3aff1..fb4a30b13a4b2 100644
--- a/llvm/test/tools/llvm-dwp/X86/unsupported_cu_index_version.s
+++ b/llvm/test/tools/llvm-dwp/X86/unsupported_cu_index_version.s
@@ -2,12 +2,15 @@
 # RUN: not llvm-dwp %t.dwp -o %t 2>&1 | FileCheck %s
 
 # CHECK: error: unsupported cu_index version: 5 (only version 2 is supported)
-
-## To reach the test point, no real data is needed in .debug_info.dwo,
-## but the section should not be empty.
     .section .debug_info.dwo, "e", @progbits
-    .space 1
-
+    .long	.Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+    .short 5                       # DWARF version number
+    .byte 5                        # DWARF Unit type
+    .byte 8                        # Address Size (in bytes)
+    .long 0                        # Offset Into Abbrev. Section
+    .quad	-346972125991005518
+.Ldebug_info_dwo_end0:
     .section .debug_cu_index, "", @progbits
 ## Header:
     .short 5                        # Version

diff  --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index d495bd3d4cab2..5cbe1a34392cf 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -64,10 +65,107 @@ static cl::opt<std::string> OutputFilename(cl::Required, "o",
                                            cl::value_desc("filename"),
                                            cl::cat(DwpCategory));
 
+// Returns the size of debug_str_offsets section headers in bytes.
+static uint64_t debugStrOffsetsHeaderSize(DataExtractor StrOffsetsData,
+                                          uint16_t DwarfVersion) {
+  if (DwarfVersion <= 4)
+    return 0; // There is no header before dwarf 5.
+  uint64_t Offset = 0;
+  uint64_t Length = StrOffsetsData.getU32(&Offset);
+  if (Length == llvm::dwarf::DW_LENGTH_DWARF64)
+    return 16; // unit length: 12 bytes, version: 2 bytes, padding: 2 bytes.
+  return 8;    // unit length: 4 bytes, version: 2 bytes, padding: 2 bytes.
+}
+
+// Holds data for Skeleton and Split Compilation Unit Headers as defined in
+// Dwarf 5 specification, 7.5.1.2 and Dwarf 4 specification 7.5.1.1.
+struct CompileUnitHeader {
+  // unit_length field. Note that the type is uint64_t even in 32-bit dwarf.
+  uint64_t Length = 0;
+
+  // version field.
+  uint16_t Version = 0;
+
+  // unit_type field. Initialized only if Version >= 5.
+  uint8_t UnitType = 0;
+
+  // address_size field.
+  uint8_t AddrSize = 0;
+
+  // debug_abbrev_offset field. Note that the type is uint64_t even in 32-bit
+  // dwarf. It is assumed to be 0.
+  uint64_t DebugAbbrevOffset = 0;
+
+  // dwo_id field. This resides in the header only if Version >= 5.
+  // In earlier versions, it is read from DW_AT_GNU_dwo_id.
+  Optional<uint64_t> Signature = None;
+
+  // Derived from the length of Length field.
+  dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32;
+
+  // The size of the Header in bytes. This is derived while parsing the header,
+  // and is stored as a convenience.
+  uint8_t HeaderSize = 0;
+};
+
+// Parse and return the header of the compile unit.
+static Expected<CompileUnitHeader> parseCompileUnitHeader(StringRef Info) {
+  CompileUnitHeader Header;
+  Error Err = Error::success();
+  uint64_t Offset = 0;
+  DWARFDataExtractor InfoData(Info, true, 0);
+  std::tie(Header.Length, Header.Format) =
+      InfoData.getInitialLength(&Offset, &Err);
+  if (Err)
+    return make_error<DWPError>("cannot parse compile unit length: " +
+                                llvm::toString(std::move(Err)));
+
+  if (!InfoData.isValidOffset(Offset + (Header.Length - 1))) {
+    return make_error<DWPError>(
+        "compile unit exceeds .debug_info section range: " +
+        utostr(Offset + Header.Length) + " >= " + utostr(InfoData.size()));
+  }
+
+  Header.Version = InfoData.getU16(&Offset, &Err);
+  if (Err)
+    return make_error<DWPError>("cannot parse compile unit version: " +
+                                llvm::toString(std::move(Err)));
+
+  uint64_t MinHeaderLength;
+  if (Header.Version >= 5) {
+    // Size: Version (2), UnitType (1), AddrSize (1), DebugAbbrevOffset (4),
+    // Signature (8)
+    MinHeaderLength = 16;
+  } else {
+    // Size: Version (2), DebugAbbrevOffset (4), AddrSize (1)
+    MinHeaderLength = 7;
+  }
+  if (Header.Length < MinHeaderLength) {
+    return make_error<DWPError>(
+        "compile unit length is too small: expected at least " +
+        utostr(MinHeaderLength) + " got " + utostr(Header.Length) + ".");
+  }
+  if (Header.Version >= 5) {
+    Header.UnitType = InfoData.getU8(&Offset);
+    Header.AddrSize = InfoData.getU8(&Offset);
+    Header.DebugAbbrevOffset = InfoData.getU32(&Offset);
+    Header.Signature = InfoData.getU64(&Offset);
+  } else {
+    // Note that, address_size and debug_abbrev_offset fields have switched
+    // places between dwarf version 4 and 5.
+    Header.DebugAbbrevOffset = InfoData.getU32(&Offset);
+    Header.AddrSize = InfoData.getU8(&Offset);
+  }
+
+  Header.HeaderSize = Offset;
+  return Header;
+}
+
 static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
                                    MCSection *StrOffsetSection,
                                    StringRef CurStrSection,
-                                   StringRef CurStrOffsetSection) {
+                                   StringRef CurStrOffsetSection,
+                                   const CompileUnitHeader &Header) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
@@ -88,8 +186,13 @@ static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
 
   Out.SwitchSection(StrOffsetSection);
 
+  uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Header.Version);
   uint64_t Offset = 0;
   uint64_t Size = CurStrOffsetSection.size();
+  // FIXME: This can be caused by bad input and should be handled as such.
+  assert(HeaderSize <= Size && "StrOffsetSection size is less than its header");
+  // Copy the header to the output.
+  Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
   while (Offset < Size) {
     auto OldOffset = Data.getU32(&Offset);
     auto NewOffset = OffsetRemapping[OldOffset];
@@ -120,16 +223,38 @@ struct CompileUnitIdentifiers {
 };
 
 static Expected<const char *>
-getIndexedString(dwarf::Form Form, DataExtractor InfoData,
-                 uint64_t &InfoOffset, StringRef StrOffsets, StringRef Str) {
+getIndexedString(dwarf::Form Form, DataExtractor InfoData, uint64_t &InfoOffset,
+                 StringRef StrOffsets, StringRef Str, uint16_t Version) {
   if (Form == dwarf::DW_FORM_string)
     return InfoData.getCStr(&InfoOffset);
-  if (Form != dwarf::DW_FORM_GNU_str_index)
+  uint64_t StrIndex;
+  switch (Form) {
+  case dwarf::DW_FORM_strx1:
+    StrIndex = InfoData.getU8(&InfoOffset);
+    break;
+  case dwarf::DW_FORM_strx2:
+    StrIndex = InfoData.getU16(&InfoOffset);
+    break;
+  case dwarf::DW_FORM_strx3:
+    StrIndex = InfoData.getU24(&InfoOffset);
+    break;
+  case dwarf::DW_FORM_strx4:
+    StrIndex = InfoData.getU32(&InfoOffset);
+    break;
+  case dwarf::DW_FORM_strx:
+  case dwarf::DW_FORM_GNU_str_index:
+    StrIndex = InfoData.getULEB128(&InfoOffset);
+    break;
+  default:
     return make_error<DWPError>(
-        "string field encoded without DW_FORM_string or DW_FORM_GNU_str_index");
-  auto StrIndex = InfoData.getULEB128(&InfoOffset);
+        "string field must be encoded with one of the following: "
+        "DW_FORM_string, DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2, "
+        "DW_FORM_strx3, DW_FORM_strx4, or DW_FORM_GNU_str_index.");
+  }
   DataExtractor StrOffsetsData(StrOffsets, true, 0);
   uint64_t StrOffsetsOffset = 4 * StrIndex;
+  StrOffsetsOffset += debugStrOffsetsHeaderSize(StrOffsetsData, Version);
+
   uint64_t StrOffset = StrOffsetsData.getU32(&StrOffsetsOffset);
   DataExtractor StrData(Str, true, 0);
   return StrData.getCStr(&StrOffset);
@@ -139,33 +264,21 @@ static Expected<CompileUnitIdentifiers> getCUIdentifiers(StringRef Abbrev,
                                                          StringRef Info,
                                                          StringRef StrOffsets,
                                                          StringRef Str) {
-  uint64_t Offset = 0;
+  Expected<CompileUnitHeader> HeaderOrError = parseCompileUnitHeader(Info);
+  if (!HeaderOrError)
+    return HeaderOrError.takeError();
+  CompileUnitHeader &Header = *HeaderOrError;
   DataExtractor InfoData(Info, true, 0);
-  dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32;
-  uint64_t Length = InfoData.getU32(&Offset);
+  uint64_t Offset = Header.HeaderSize;
+  if (Header.Version >= 5 && Header.UnitType != dwarf::DW_UT_split_compile)
+    return make_error<DWPError>(
+        std::string("unit type DW_UT_split_compile type not found in "
+                    "debug_info header. Unexpected unit type 0x" +
+                    utostr(Header.UnitType) + " found"));
+
   CompileUnitIdentifiers ID;
-  Optional<uint64_t> Signature = None;
-  // If the length is 0xffffffff, then this indictes that this is a DWARF 64
-  // stream and the length is actually encoded into a 64 bit value that follows.
-  if (Length == 0xffffffffU) {
-    Format = dwarf::DwarfFormat::DWARF64;
-    Length = InfoData.getU64(&Offset);
-  }
-  uint16_t Version = InfoData.getU16(&Offset);
-  if (Version >= 5) {
-    auto UnitType = InfoData.getU8(&Offset);
-    if (UnitType != dwarf::DW_UT_split_compile)
-      return make_error<DWPError>(
-          std::string("unit type DW_UT_split_compile type not found in "
-                      "debug_info header. Unexpected unit type 0x" +
-                      utostr(UnitType) + " found"));
-  }
-  InfoData.getU32(&Offset); // Abbrev offset (should be zero)
-  uint8_t AddrSize = InfoData.getU8(&Offset);
-  if (Version >= 5)
-    Signature = InfoData.getU64(&Offset);
-  uint32_t AbbrCode = InfoData.getULEB128(&Offset);
 
+  uint32_t AbbrCode = InfoData.getULEB128(&Offset);
   DataExtractor AbbrevData(Abbrev, true, 0);
   uint64_t AbbrevOffset = getCUAbbrev(Abbrev, AbbrCode);
   auto Tag = static_cast<dwarf::Tag>(AbbrevData.getULEB128(&AbbrevOffset));
@@ -180,8 +293,8 @@ static Expected<CompileUnitIdentifiers> getCUIdentifiers(StringRef Abbrev,
          (Name != 0 || Form != 0)) {
     switch (Name) {
     case dwarf::DW_AT_name: {
-      Expected<const char *> EName =
-          getIndexedString(Form, InfoData, Offset, StrOffsets, Str);
+      Expected<const char *> EName = getIndexedString(
+          Form, InfoData, Offset, StrOffsets, Str, Header.Version);
       if (!EName)
         return EName.takeError();
       ID.Name = *EName;
@@ -189,24 +302,25 @@ static Expected<CompileUnitIdentifiers> getCUIdentifiers(StringRef Abbrev,
     }
     case dwarf::DW_AT_GNU_dwo_name:
     case dwarf::DW_AT_dwo_name: {
-      Expected<const char *> EName =
-          getIndexedString(Form, InfoData, Offset, StrOffsets, Str);
+      Expected<const char *> EName = getIndexedString(
+          Form, InfoData, Offset, StrOffsets, Str, Header.Version);
       if (!EName)
         return EName.takeError();
       ID.DWOName = *EName;
       break;
     }
     case dwarf::DW_AT_GNU_dwo_id:
-      Signature = InfoData.getU64(&Offset);
+      Header.Signature = InfoData.getU64(&Offset);
       break;
     default:
-      DWARFFormValue::skipValue(Form, InfoData, &Offset,
-                                dwarf::FormParams({Version, AddrSize, Format}));
+      DWARFFormValue::skipValue(
+          Form, InfoData, &Offset,
+          dwarf::FormParams({Header.Version, Header.AddrSize, Header.Format}));
     }
   }
-  if (!Signature)
+  if (!Header.Signature)
     return make_error<DWPError>("compile unit missing dwo_id");
-  ID.Signature = *Signature;
+  ID.Signature = *Header.Signature;
   return ID;
 }
 
@@ -596,8 +710,14 @@ static Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
     if (InfoSection.empty())
       continue;
 
+    Expected<CompileUnitHeader> CompileUnitHeaderOrErr =
+        parseCompileUnitHeader(InfoSection);
+    if (!CompileUnitHeaderOrErr)
+      return CompileUnitHeaderOrErr.takeError();
+    CompileUnitHeader &CompileUnitHeader = *CompileUnitHeaderOrErr;
+
     writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
-                           CurStrOffsetSection);
+                           CurStrOffsetSection, CompileUnitHeader);
 
     if (CurCUIndexSection.empty()) {
       Expected<CompileUnitIdentifiers> EID = getCUIdentifiers(


        


More information about the llvm-commits mailing list