[llvm] r339122 - [DebugInfo] Reduce debug_str_offsets section size

Pavel Labath via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 7 02:54:53 PDT 2018


Author: labath
Date: Tue Aug  7 02:54:52 2018
New Revision: 339122

URL: http://llvm.org/viewvc/llvm-project?rev=339122&view=rev
Log:
[DebugInfo] Reduce debug_str_offsets section size

Summary:
The accelerator tables use the debug_str section to store their strings.
However, they do not support the indirect method of access that is
available for the debug_info section (DW_FORM_strx et al.).

Currently our code is assuming that all strings can/will be referenced
indirectly, and puts all of them into the debug_str_offsets section.
This is generally true for regular (unsplit) dwarf, but in the DWO case,
most of the strings in the debug_str section will only be used from the
accelerator tables. Therefore the contents of the debug_str_offsets
section will be largely unused and bloating the main executable.

This patch rectifies this by teaching the DwarfStringPool to
differentiate between strings accessed directly and indirectly. When a
user inserts a string into the pool it has to declare whether that
string will be referenced directly or not. If at least one user requsts
indirect access, that string will be assigned an index ID and put into
debug_str_offsets table. Otherwise, the offset table is skipped.

This approach reduces the overall binary size (when compiled with
-gdwarf-5 -gsplit-dwarf) in my tests by about 2% (debug_str_offsets is
shrunk by 99%).

Reviewers: probinson, dblaikie, JDevlieghere

Subscribers: aprantl, mgrang, llvm-commits

Differential Revision: https://reviews.llvm.org/D49493

Added:
    llvm/trunk/test/DebugInfo/X86/string-offsets-table-order.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/DwarfStringPoolEntry.h
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.h
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
    llvm/trunk/test/DebugInfo/X86/string-offsets-table.ll
    llvm/trunk/tools/dsymutil/DwarfStreamer.cpp
    llvm/trunk/tools/dsymutil/MachOUtils.cpp
    llvm/trunk/tools/dsymutil/NonRelocatableStringpool.cpp
    llvm/trunk/tools/dsymutil/NonRelocatableStringpool.h
    llvm/trunk/unittests/CodeGen/DIEHashTest.cpp
    llvm/trunk/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
    llvm/trunk/unittests/DebugInfo/DWARF/DwarfGenerator.cpp

Modified: llvm/trunk/include/llvm/CodeGen/DwarfStringPoolEntry.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/DwarfStringPoolEntry.h?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/DwarfStringPoolEntry.h (original)
+++ llvm/trunk/include/llvm/CodeGen/DwarfStringPoolEntry.h Tue Aug  7 02:54:52 2018
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
 #define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
 
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/StringMap.h"
 
 namespace llvm {
@@ -18,34 +19,52 @@ class MCSymbol;
 
 /// Data for a string pool entry.
 struct DwarfStringPoolEntry {
+  static constexpr unsigned NotIndexed = -1;
+
   MCSymbol *Symbol;
   unsigned Offset;
   unsigned Index;
+
+  bool isIndexed() const { return Index != NotIndexed; }
 };
 
 /// String pool entry reference.
-struct DwarfStringPoolEntryRef {
-  const StringMapEntry<DwarfStringPoolEntry> *I = nullptr;
+class DwarfStringPoolEntryRef {
+  PointerIntPair<const StringMapEntry<DwarfStringPoolEntry> *, 1, bool>
+      MapEntryAndIndexed;
+
+  const StringMapEntry<DwarfStringPoolEntry> *getMapEntry() const {
+    return MapEntryAndIndexed.getPointer();
+  }
 
 public:
   DwarfStringPoolEntryRef() = default;
-  explicit DwarfStringPoolEntryRef(
-      const StringMapEntry<DwarfStringPoolEntry> &I)
-      : I(&I) {}
+  DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry,
+                          bool Indexed)
+      : MapEntryAndIndexed(&Entry, Indexed) {}
 
-  explicit operator bool() const { return I; }
+  explicit operator bool() const { return getMapEntry(); }
   MCSymbol *getSymbol() const {
-    assert(I->second.Symbol && "No symbol available!");
-    return I->second.Symbol;
+    assert(getMapEntry()->second.Symbol && "No symbol available!");
+    return getMapEntry()->second.Symbol;
   }
-  unsigned getOffset() const { return I->second.Offset; }
-  unsigned getIndex() const { return I->second.Index; }
-  StringRef getString() const { return I->first(); }
+  unsigned getOffset() const { return getMapEntry()->second.Offset; }
+  bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
+  unsigned getIndex() const {
+    assert(isIndexed());
+    assert(getMapEntry()->getValue().isIndexed());
+    return getMapEntry()->second.Index;
+  }
+  StringRef getString() const { return getMapEntry()->first(); }
   /// Return the entire string pool entry for convenience.
-  DwarfStringPoolEntry getEntry() const { return I->getValue(); }
+  DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); }
 
-  bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; }
-  bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; }
+  bool operator==(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() == X.getMapEntry();
+  }
+  bool operator!=(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() != X.getMapEntry();
+  }
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Aug  7 02:54:52 2018
@@ -2437,8 +2437,7 @@ void DwarfDebug::addAccelNameImpl(AccelT
     return;
 
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-  DwarfStringPoolEntryRef Ref =
-      Holder.getStringPool().getEntry(*Asm, Name);
+  DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
 
   switch (getAccelTableKind()) {
   case AccelTableKind::Apple:

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp Tue Aug  7 02:54:52 2018
@@ -24,25 +24,39 @@ DwarfStringPool::DwarfStringPool(BumpPtr
     : Pool(A), Prefix(Prefix),
       ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
 
-DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
-                                                    StringRef Str) {
+StringMapEntry<DwarfStringPool::EntryTy> &
+DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
   auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+  auto &Entry = I.first->second;
   if (I.second) {
-    auto &Entry = I.first->second;
-    Entry.Index = Pool.size() - 1;
+    Entry.Index = EntryTy::NotIndexed;
     Entry.Offset = NumBytes;
     Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
 
     NumBytes += Str.size() + 1;
     assert(NumBytes > Entry.Offset && "Unexpected overflow");
   }
-  return EntryRef(*I.first);
+  return *I.first;
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+                                                    StringRef Str) {
+  auto &MapEntry = getEntryImpl(Asm, Str);
+  return EntryRef(MapEntry, false);
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
+                                                           StringRef Str) {
+  auto &MapEntry = getEntryImpl(Asm, Str);
+  if (!MapEntry.getValue().isIndexed())
+    MapEntry.getValue().Index = NumIndexedStrings++;
+  return EntryRef(MapEntry, true);
 }
 
 void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
                                                    MCSection *Section,
                                                    MCSymbol *StartSym) {
-  if (empty())
+  if (getNumIndexedStrings() == 0)
     return;
   Asm.OutStreamer->SwitchSection(Section);
   unsigned EntrySize = 4;
@@ -51,7 +65,7 @@ void DwarfStringPool::emitStringOffsetsT
   // table. The header consists of an entry with the contribution's
   // size (not including the size of the length field), the DWARF version and
   // 2 bytes of padding.
-  Asm.emitInt32(size() * EntrySize + 4);
+  Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
   Asm.emitInt16(Asm.getDwarfVersion());
   Asm.emitInt16(0);
   // Define the symbol that marks the start of the contribution. It is
@@ -69,12 +83,18 @@ void DwarfStringPool::emit(AsmPrinter &A
   // Start the dwarf str section.
   Asm.OutStreamer->SwitchSection(StrSection);
 
-  // Get all of the string pool entries and put them in an array by their ID so
-  // we can sort them.
-  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
+  // Get all of the string pool entries and sort them by their offset.
+  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
+  Entries.reserve(Pool.size());
 
   for (const auto &E : Pool)
-    Entries[E.getValue().Index] = &E;
+    Entries.push_back(&E);
+
+  llvm::sort(
+      Entries.begin(), Entries.end(),
+      [](const StringMapEntry<EntryTy> *A, const StringMapEntry<EntryTy> *B) {
+        return A->getValue().Offset < B->getValue().Offset;
+      });
 
   for (const auto &Entry : Entries) {
     assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
@@ -93,6 +113,14 @@ void DwarfStringPool::emit(AsmPrinter &A
 
   // If we've got an offset section go ahead and emit that now as well.
   if (OffsetSection) {
+    // Now only take the indexed entries and put them in an array by their ID so
+    // we can emit them in order.
+    Entries.resize(NumIndexedStrings);
+    for (const auto &Entry : Pool) {
+      if (Entry.getValue().isIndexed())
+        Entries[Entry.getValue().Index] = &Entry;
+    }
+
     Asm.OutStreamer->SwitchSection(OffsetSection);
     unsigned size = 4; // FIXME: DWARF64 is 8.
     for (const auto &Entry : Entries)

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.h?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.h (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfStringPool.h Tue Aug  7 02:54:52 2018
@@ -30,8 +30,11 @@ class DwarfStringPool {
   StringMap<EntryTy, BumpPtrAllocator &> Pool;
   StringRef Prefix;
   unsigned NumBytes = 0;
+  unsigned NumIndexedStrings = 0;
   bool ShouldCreateSymbols;
 
+  StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str);
+
 public:
   using EntryRef = DwarfStringPoolEntryRef;
 
@@ -48,8 +51,15 @@ public:
 
   unsigned size() const { return Pool.size(); }
 
+  unsigned getNumIndexedStrings() const { return NumIndexedStrings; }
+
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+
+  /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a
+  /// unique ID of this entry (e.g., for use in indexed forms like
+  /// DW_FORM_strx).
+  EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str);
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Tue Aug  7 02:54:52 2018
@@ -243,9 +243,14 @@ void DwarfUnit::addString(DIE &Die, dwar
                      DIEInlineString(String, DIEValueAllocator));
     return;
   }
-  auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
   dwarf::Form IxForm =
       isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+
+  auto StringPoolEntry =
+      useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
+          ? DU->getStringPool().getIndexedEntry(*Asm, String)
+          : DU->getStringPool().getEntry(*Asm, String);
+
   // For DWARF v5 and beyond, use the smallest strx? form possible.
   if (useSegmentedStringOffsetsTable()) {
     IxForm = dwarf::DW_FORM_strx1;

Added: llvm/trunk/test/DebugInfo/X86/string-offsets-table-order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/string-offsets-table-order.ll?rev=339122&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/string-offsets-table-order.ll (added)
+++ llvm/trunk/test/DebugInfo/X86/string-offsets-table-order.ll Tue Aug  7 02:54:52 2018
@@ -0,0 +1,79 @@
+; REQUIRES: object-emission
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=foo.dwo -filetype=obj < %s \
+; RUN:   | llvm-dwarfdump -v - | FileCheck %s
+
+; This triggers a situation where the order of entries in the .debug_str and
+; .debug_str_offsets sections does not match and makes sure that all entries are
+; still wired up correctly.
+
+; Produced with "clang -S -emit-llvm -gdwarf-5" from source "int X;", copied
+; three times and modified by hand.
+
+; CHECK: .debug_info contents:
+; CHECK:   DW_TAG_compile_unit
+; CHECK:     DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000001) string = "X3")
+; CHECK:   DW_TAG_compile_unit
+; CHECK:     DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000002) string = "X2")
+; CHECK:   DW_TAG_compile_unit
+; CHECK:     DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000003) string = "X1")
+; CHECK: .debug_info.dwo contents:
+
+; CHECK: .debug_str contents:
+; CHECK: 0x[[X3:[0-9a-f]*]]: "X3"
+; CHECK: 0x[[X1:[0-9a-f]*]]: "X1"
+; CHECK: 0x[[X2:[0-9a-f]*]]: "X2"
+
+; CHECK: .debug_str_offsets contents:
+; CHECK: Format = DWARF32, Version = 5
+; CHECK-NEXT: 00000000 "foo.dwo"
+; CHECK-NEXT: [[X3]] "X3"
+; CHECK-NEXT: [[X2]] "X2"
+; CHECK-NEXT: [[X1]] "X1"
+; CHECK-EMPTY:
+
+
+
+!llvm.dbg.cu = !{!10, !20, !30}
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 2, !"Dwarf Version", i32 5}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{i32 1, !"wchar_size", i32 4}
+!3 = !{!"clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)"}
+
+
+ at X1 = dso_local global i32 0, align 4, !dbg !11
+
+!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !13, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !14, globals: !15)
+!11 = !DIGlobalVariableExpression(var: !12, expr: !DIExpression())
+!12 = distinct !DIGlobalVariable(name: "X1", scope: !10, file: !16, line: 1, type: !17, isLocal: false, isDefinition: true)
+!13 = !DIFile(filename: "-", directory: "X3", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!14 = !{}
+!15 = !{!11}
+!16 = !DIFile(filename: "<stdin>", directory: "X3", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+
+
+ at X2 = dso_local global i32 0, align 4, !dbg !21
+
+!20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !23, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !24, globals: !25)
+!21 = !DIGlobalVariableExpression(var: !22, expr: !DIExpression())
+!22 = distinct !DIGlobalVariable(name: "X2", scope: !20, file: !26, line: 1, type: !27, isLocal: false, isDefinition: true)
+!23 = !DIFile(filename: "-", directory: "X2", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!24 = !{}
+!25 = !{!21}
+!26 = !DIFile(filename: "<stdin>", directory: "X2", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!27 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+
+
+ at X3 = dso_local global i32 0, align 4, !dbg !31
+
+!30 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !33, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !34, globals: !35)
+!31 = !DIGlobalVariableExpression(var: !32, expr: !DIExpression())
+!32 = distinct !DIGlobalVariable(name: "X3", scope: !30, file: !36, line: 1, type: !37, isLocal: false, isDefinition: true)
+!33 = !DIFile(filename: "-", directory: "X1", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!34 = !{}
+!35 = !{!31}
+!36 = !DIFile(filename: "<stdin>", directory: "X1", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e")
+!37 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

Modified: llvm/trunk/test/DebugInfo/X86/string-offsets-table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/string-offsets-table.ll?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/string-offsets-table.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/string-offsets-table.ll Tue Aug  7 02:54:52 2018
@@ -1,7 +1,7 @@
 ; REQUIRES: object-emission
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -filetype=obj < %s | llvm-dwarfdump -v - \
 ; RUN:   | FileCheck --check-prefix=MONOLITHIC %s
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=%t.dwo -filetype=obj < %s \
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=foo.dwo -filetype=obj < %s \
 ; RUN:   | llvm-dwarfdump -v - | FileCheck --check-prefix=SPLIT %s
 
 ; This basic test checks the emission of a DWARF v5 string offsets table in
@@ -59,6 +59,8 @@
 ; SPLIT:      DW_TAG_compile_unit
 ; SPLIT-NOT:  {{DW_TAG|contents:}}
 ; SPLIT:      DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008)
+; SPLIT:      DW_AT_GNU_dwo_name [DW_FORM_strx1] ( indexed (00000000) string = "foo.dwo")
+; SPLIT:      DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000001) string = "/home/test")
 
 ; Check for the split CU in .debug_info.dwo.
 ; SPLIT:      .debug_info.dwo contents:
@@ -79,10 +81,10 @@
 ;
 ; Extract the string offsets referenced in the main file by the skeleton unit.
 ; SPLIT:      .debug_str contents:
-; SPLIT-NEXT: 0x00000000:{{.*}}
-; SPLIT-NEXT: 0x[[STRING2SPLIT:[0-9a-f]*]]{{.*}}
-; SPLIT-NEXT: 0x[[STRING3SPLIT:[0-9a-f]*]]{{.*}}
-; SPLIT-NEXT: 0x[[STRING4SPLIT:[0-9a-f]*]]{{.*}}
+; SPLIT-NEXT: 0x00000000: "foo.dwo"
+; SPLIT-NEXT: 0x[[STRING2SPLIT:[0-9a-f]*]]: "/home/test"
+; SPLIT-NEXT: 0x[[STRING3SPLIT:[0-9a-f]*]]: "E"
+; SPLIT-NEXT: 0x[[STRING4SPLIT:[0-9a-f]*]]: "glob"
 ;
 ; Extract the string offsets referenced in the .dwo file by the split unit.
 ; SPLIT:      .debug_str.dwo contents:
@@ -91,13 +93,15 @@
 ; SPLIT-NEXT: 0x[[STRING3DWO:[0-9a-f]*]]{{.*}}
 ;
 ; Check the string offsets sections in both the main and the .dwo files and
-; verify that the extracted string offsets are referenced correctly.
+; verify that the extracted string offsets are referenced correctly. The
+; sections should contain only the offsets of strings that are actually
+; referenced by the debug info.
 ; SPLIT:      .debug_str_offsets contents:
-; SPLIT-NEXT: 0x00000000: Contribution size = 20, Format = DWARF32, Version = 5
-; SPLIT-NEXT: 0x00000008: 00000000{{.*}}
-; SPLIT-NEXT: 0x0000000c: [[STRING2SPLIT]]
-; SPLIT-NEXT: 0x00000010: [[STRING3SPLIT]]
-; SPLIT-NEXT: 0x00000014: [[STRING4SPLIT]]
+; SPLIT-NEXT: 0x00000000: Contribution size = 12, Format = DWARF32, Version = 5
+; SPLIT-NEXT: 0x00000008: 00000000 "foo.dwo"
+; SPLIT-NEXT: 0x0000000c: [[STRING2SPLIT]] "/home/test"
+; SPLIT-EMPTY:
+
 ; SPLIT:      .debug_str_offsets.dwo contents:
 ; SPLIT-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
 ; SPLIT-NEXT: 0x00000008: 00000000{{.*}}

Modified: llvm/trunk/tools/dsymutil/DwarfStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/dsymutil/DwarfStreamer.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/tools/dsymutil/DwarfStreamer.cpp (original)
+++ llvm/trunk/tools/dsymutil/DwarfStreamer.cpp Tue Aug  7 02:54:52 2018
@@ -190,10 +190,8 @@ void DwarfStreamer::emitDIE(DIE &Die) {
 /// Emit the debug_str section stored in \p Pool.
 void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
   Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrSection());
-  std::vector<DwarfStringPoolEntryRef> Entries = Pool.getEntries();
+  std::vector<DwarfStringPoolEntryRef> Entries = Pool.getEntriesForEmission();
   for (auto Entry : Entries) {
-    if (Entry.getIndex() == -1U)
-      break;
     // Emit the string itself.
     Asm->OutStreamer->EmitBytes(Entry.getString());
     // Emit a null terminator.

Modified: llvm/trunk/tools/dsymutil/MachOUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/dsymutil/MachOUtils.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/tools/dsymutil/MachOUtils.cpp (original)
+++ llvm/trunk/tools/dsymutil/MachOUtils.cpp Tue Aug  7 02:54:52 2018
@@ -514,10 +514,9 @@ bool generateDsymCompanion(const DebugMa
     // Reproduce that behavior for now (there is corresponding code in
     // transferSymbol).
     OutFile << '\0';
-    std::vector<DwarfStringPoolEntryRef> Strings = NewStrings.getEntries();
+    std::vector<DwarfStringPoolEntryRef> Strings =
+        NewStrings.getEntriesForEmission();
     for (auto EntryRef : Strings) {
-      if (EntryRef.getIndex() == -1U)
-        break;
       OutFile.write(EntryRef.getString().data(),
                     EntryRef.getString().size() + 1);
     }

Modified: llvm/trunk/tools/dsymutil/NonRelocatableStringpool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/dsymutil/NonRelocatableStringpool.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/tools/dsymutil/NonRelocatableStringpool.cpp (original)
+++ llvm/trunk/tools/dsymutil/NonRelocatableStringpool.cpp Tue Aug  7 02:54:52 2018
@@ -18,27 +18,28 @@ DwarfStringPoolEntryRef NonRelocatableSt
 
   auto I = Strings.insert({S, DwarfStringPoolEntry()});
   auto &Entry = I.first->second;
-  if (I.second || Entry.Index == -1U) {
+  if (I.second || !Entry.isIndexed()) {
     Entry.Index = NumEntries++;
     Entry.Offset = CurrentEndOffset;
     Entry.Symbol = nullptr;
     CurrentEndOffset += S.size() + 1;
   }
-  return DwarfStringPoolEntryRef(*I.first);
+  return DwarfStringPoolEntryRef(*I.first, true);
 }
 
 StringRef NonRelocatableStringpool::internString(StringRef S) {
-  DwarfStringPoolEntry Entry{nullptr, 0, -1U};
+  DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
   auto InsertResult = Strings.insert({S, Entry});
   return InsertResult.first->getKey();
 }
 
 std::vector<DwarfStringPoolEntryRef>
-NonRelocatableStringpool::getEntries() const {
+NonRelocatableStringpool::getEntriesForEmission() const {
   std::vector<DwarfStringPoolEntryRef> Result;
   Result.reserve(Strings.size());
   for (const auto &E : Strings)
-    Result.emplace_back(E);
+    if (E.getValue().isIndexed())
+      Result.emplace_back(E, true);
   llvm::sort(
       Result.begin(), Result.end(),
       [](const DwarfStringPoolEntryRef A, const DwarfStringPoolEntryRef B) {

Modified: llvm/trunk/tools/dsymutil/NonRelocatableStringpool.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/dsymutil/NonRelocatableStringpool.h?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/tools/dsymutil/NonRelocatableStringpool.h (original)
+++ llvm/trunk/tools/dsymutil/NonRelocatableStringpool.h Tue Aug  7 02:54:52 2018
@@ -53,7 +53,9 @@ public:
 
   uint64_t getSize() { return CurrentEndOffset; }
 
-  std::vector<DwarfStringPoolEntryRef> getEntries() const;
+  /// Return the list of strings to be emitted. This does not contain the
+  /// strings which were added via internString only.
+  std::vector<DwarfStringPoolEntryRef> getEntriesForEmission() const;
 
 private:
   MapTy Strings;

Modified: llvm/trunk/unittests/CodeGen/DIEHashTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/CodeGen/DIEHashTest.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/unittests/CodeGen/DIEHashTest.cpp (original)
+++ llvm/trunk/unittests/CodeGen/DIEHashTest.cpp Tue Aug  7 02:54:52 2018
@@ -31,8 +31,8 @@ private:
 public:
   DIEString getString(StringRef S) {
     DwarfStringPoolEntry Entry = {nullptr, 1, 1};
-    return DIEString(
-        DwarfStringPoolEntryRef(*Pool.insert(std::make_pair(S, Entry)).first));
+    return DIEString(DwarfStringPoolEntryRef(
+        *Pool.insert(std::make_pair(S, Entry)).first, Entry.isIndexed()));
   }
 };
 

Modified: llvm/trunk/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp (original)
+++ llvm/trunk/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp Tue Aug  7 02:54:52 2018
@@ -1007,6 +1007,99 @@ TEST(DWARFDebugInfo, TestDWARF32Version4
   TestAddresses<4, AddrType>();
 }
 
+TEST(DWARFDebugInfo, TestStringOffsets) {
+  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  if (!isConfigurationSupported(Triple))
+    return;
+
+  const char *String1 = "Hello";
+  const char *String2 = "World";
+
+  auto ExpectedDG = dwarfgen::Generator::create(Triple, 5);
+  ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
+  dwarfgen::Generator *DG = ExpectedDG.get().get();
+  dwarfgen::CompileUnit &CU = DG->addCompileUnit();
+  dwarfgen::DIE CUDie = CU.getUnitDIE();
+
+  CUDie.addStrOffsetsBaseAttribute();
+
+  uint16_t Attr = DW_AT_lo_user;
+
+  // Create our strings. First we create a non-indexed reference to String1,
+  // followed by an indexed String2. Finally, we add an indexed reference to
+  // String1.
+  const auto Attr1 = static_cast<dwarf::Attribute>(Attr++);
+  CUDie.addAttribute(Attr1, DW_FORM_strp, String1);
+
+  const auto Attr2 = static_cast<dwarf::Attribute>(Attr++);
+  CUDie.addAttribute(Attr2, DW_FORM_strx, String2);
+
+  const auto Attr3 = static_cast<dwarf::Attribute>(Attr++);
+  CUDie.addAttribute(Attr3, DW_FORM_strx, String1);
+
+  // Generate the DWARF
+  StringRef FileBytes = DG->generate();
+  MemoryBufferRef FileBuffer(FileBytes, "dwarf");
+  auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
+  ASSERT_TRUE((bool)Obj);
+  std::unique_ptr<DWARFContext> DwarfContext = DWARFContext::create(**Obj);
+  uint32_t NumCUs = DwarfContext->getNumCompileUnits();
+  ASSERT_EQ(NumCUs, 1u);
+  DWARFUnit *U = DwarfContext->getUnitAtIndex(0);
+  auto DieDG = U->getUnitDIE(false);
+  ASSERT_TRUE(DieDG.isValid());
+
+  // Now make sure the string offsets came out properly. Attr2 should have index
+  // 0 (because it was the first indexed string) even though the string itself
+  // was added eariler.
+  auto Extracted1 = toString(DieDG.find(Attr1));
+  ASSERT_TRUE((bool)Extracted1);
+  EXPECT_STREQ(String1, *Extracted1);
+
+  Optional<DWARFFormValue> Form2 = DieDG.find(Attr2);
+  ASSERT_TRUE((bool)Form2);
+  EXPECT_EQ(0u, Form2->getRawUValue());
+  auto Extracted2 = toString(Form2);
+  ASSERT_TRUE((bool)Extracted2);
+  EXPECT_STREQ(String2, *Extracted2);
+
+  Optional<DWARFFormValue> Form3 = DieDG.find(Attr3);
+  ASSERT_TRUE((bool)Form3);
+  EXPECT_EQ(1u, Form3->getRawUValue());
+  auto Extracted3 = toString(Form3);
+  ASSERT_TRUE((bool)Extracted3);
+  EXPECT_STREQ(String1, *Extracted3);
+}
+
+TEST(DWARFDebugInfo, TestEmptyStringOffsets) {
+  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  if (!isConfigurationSupported(Triple))
+    return;
+
+  const char *String1 = "Hello";
+
+  auto ExpectedDG = dwarfgen::Generator::create(Triple, 5);
+  ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
+  dwarfgen::Generator *DG = ExpectedDG.get().get();
+  dwarfgen::CompileUnit &CU = DG->addCompileUnit();
+  dwarfgen::DIE CUDie = CU.getUnitDIE();
+
+  uint16_t Attr = DW_AT_lo_user;
+
+  // We shall insert only one string. It will be referenced directly.
+  const auto Attr1 = static_cast<dwarf::Attribute>(Attr++);
+  CUDie.addAttribute(Attr1, DW_FORM_strp, String1);
+
+  // Generate the DWARF
+  StringRef FileBytes = DG->generate();
+  MemoryBufferRef FileBuffer(FileBytes, "dwarf");
+  auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
+  ASSERT_TRUE((bool)Obj);
+  std::unique_ptr<DWARFContext> DwarfContext = DWARFContext::create(**Obj);
+  EXPECT_TRUE(
+      DwarfContext->getDWARFObj().getStringOffsetSection().Data.empty());
+}
+
 TEST(DWARFDebugInfo, TestRelations) {
   Triple Triple = getHostTripleForAddrSize(sizeof(void *));
   if (!isConfigurationSupported(Triple))

Modified: llvm/trunk/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/DebugInfo/DWARF/DwarfGenerator.cpp?rev=339122&r1=339121&r2=339122&view=diff
==============================================================================
--- llvm/trunk/unittests/DebugInfo/DWARF/DwarfGenerator.cpp (original)
+++ llvm/trunk/unittests/DebugInfo/DWARF/DwarfGenerator.cpp Tue Aug  7 02:54:52 2018
@@ -71,15 +71,20 @@ void dwarfgen::DIE::addAttribute(uint16_
     break;
 
   case DW_FORM_strp:
+    Die->addValue(
+        DG.getAllocator(), static_cast<dwarf::Attribute>(A), Form,
+        DIEString(DG.getStringPool().getEntry(*DG.getAsmPrinter(), String)));
+    break;
+
   case DW_FORM_GNU_str_index:
   case DW_FORM_strx:
   case DW_FORM_strx1:
   case DW_FORM_strx2:
   case DW_FORM_strx3:
   case DW_FORM_strx4:
-    Die->addValue(
-        DG.getAllocator(), static_cast<dwarf::Attribute>(A), Form,
-        DIEString(DG.getStringPool().getEntry(*DG.getAsmPrinter(), String)));
+    Die->addValue(DG.getAllocator(), static_cast<dwarf::Attribute>(A), Form,
+                  DIEString(DG.getStringPool().getIndexedEntry(
+                      *DG.getAsmPrinter(), String)));
     break;
 
   default:




More information about the llvm-commits mailing list