[llvm] r198397 - Debug Info: Type Units: Simplify type hashing using IR-provided unique names.

David Blaikie dblaikie at gmail.com
Thu Jan 2 20:20:26 PST 2014


Author: dblaikie
Date: Thu Jan  2 22:20:26 2014
New Revision: 198397

URL: http://llvm.org/viewvc/llvm-project?rev=198397&view=rev
Log:
Debug Info: Type Units: Simplify type hashing using IR-provided unique names.

What's good for LTO metadata size problems ought to be good for non-LTO
debug info size too, so let's rely on the same uniqueness in both cases.
If it's insufficient for non-LTO for whatever reason (since we now won't
be uniquing CU-local types or any C types - but these are likely to not
be the most significant contributors to type bloat) we should consider a
frontend solution that'll help both LTO and non-LTO alike, rather than
using DWARF-level DIE-hashing that only helps non-LTO debug info size.

It's also much simpler this way and benefits C++ even more since we can
deduplicate lexically separate definitions of the same C++ type since
they have the same mangled name.

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
    llvm/trunk/test/DebugInfo/X86/c-type-units.ll
    llvm/trunk/test/DebugInfo/X86/generate-odr-hash.ll

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=198397&r1=198396&r2=198397&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Thu Jan  2 22:20:26 2014
@@ -58,11 +58,6 @@ static cl::opt<bool> UnknownLocations(
     cl::desc("Make an absence of debug location information explicit."),
     cl::init(false));
 
-static cl::opt<bool>
-GenerateODRHash("generate-odr-hash", cl::Hidden,
-                cl::desc("Add an ODR hash to external type DIEs."),
-                cl::init(false));
-
 static cl::opt<bool> GenerateCUHash("generate-cu-hash", cl::Hidden,
                                     cl::desc("Add the CU hash as the dwo_id."),
                                     cl::init(false));
@@ -1019,41 +1014,6 @@ void DwarfDebug::collectDeadVariables()
   }
 }
 
-// Type Signature [7.27] and ODR Hash code.
-
-/// \brief Grabs the string in whichever attribute is passed in and returns
-/// a reference to it. Returns "" if the attribute doesn't exist.
-static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) {
-  DIEValue *V = Die->findAttribute(Attr);
-
-  if (DIEString *S = dyn_cast_or_null<DIEString>(V))
-    return S->getString();
-
-  return StringRef("");
-}
-
-/// Return true if the current DIE is contained within an anonymous namespace.
-static bool isContainedInAnonNamespace(DIE *Die) {
-  DIE *Parent = Die->getParent();
-
-  while (Parent) {
-    if (Parent->getTag() == dwarf::DW_TAG_namespace &&
-        getDIEStringAttr(Parent, dwarf::DW_AT_name) == "")
-      return true;
-    Parent = Parent->getParent();
-  }
-
-  return false;
-}
-
-/// Test if the current CU language is C++ and that we have
-/// a named type that is not contained in an anonymous namespace.
-static bool shouldAddODRHash(DwarfTypeUnit *CU, DIE *Die) {
-  return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
-         getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
-         !isContainedInAnonNamespace(Die);
-}
-
 void DwarfDebug::finalizeModuleInfo() {
   // Collect info for variables that were optimized out.
   collectDeadVariables();
@@ -3041,8 +3001,8 @@ void DwarfDebug::emitDebugStrDWO() {
                          OffSec, StrSym);
 }
 
-void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, DIE *RefDie,
-                                      DICompositeType CTy) {
+void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, StringRef Identifier,
+                                      DIE *RefDie, DICompositeType CTy) {
   const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy];
   if (!TU) {
     DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit);
@@ -3057,16 +3017,14 @@ void DwarfDebug::addDwarfTypeUnitType(ui
 
     DIE *Die = NewTU->createTypeDIE(CTy);
 
-    if (GenerateODRHash && shouldAddODRHash(NewTU, Die))
-      NewTU->addUInt(UnitDie, dwarf::DW_AT_GNU_odr_signature,
-                     dwarf::DW_FORM_data8,
-                     DIEHash().computeDIEODRSignature(*Die));
-    // FIXME: This won't handle circularly referential structures, as the DIE
-    // may have references to other DIEs still under construction and missing
-    // their signature. Hashing should walk through the signatures to their
-    // referenced type, or possibly walk the precomputed hashes of related types
-    // at the end.
-    uint64_t Signature = DIEHash().computeTypeSignature(*Die);
+    MD5 Hash;
+    Hash.update(Identifier);
+    // ... take the least significant 8 bytes and return those. Our MD5
+    // implementation always returns its results in little endian, swap bytes
+    // appropriately.
+    MD5::MD5Result Result;
+    Hash.final(Result);
+    uint64_t Signature = *reinterpret_cast<uint64_t *>(Result + 8);
     NewTU->setTypeSignature(Signature);
     NewTU->setType(Die);
 

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=198397&r1=198396&r2=198397&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Thu Jan  2 22:20:26 2014
@@ -695,7 +695,8 @@ public:
 
   /// \brief Add a DIE to the set of types that we're going to pull into
   /// type units.
-  void addDwarfTypeUnitType(uint16_t Language, DIE *Die, DICompositeType CTy);
+  void addDwarfTypeUnitType(uint16_t Language, StringRef Identifier, DIE *Die,
+                            DICompositeType CTy);
 
   /// \brief Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp?rev=198397&r1=198396&r2=198397&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Thu Jan  2 22:20:26 2014
@@ -928,41 +928,6 @@ DIE *DwarfUnit::createTypeDIE(DIComposit
   return TyDIE;
 }
 
-/// Return true if the type is appropriately scoped to be contained inside
-/// its own type unit.
-static bool isDwarfTypeUnitScoped(DIType Ty, const DwarfDebug *DD) {
-  DIScope Parent = DD->resolve(Ty.getContext());
-  while (Parent) {
-    // Don't generate a hash for anything scoped inside a function.
-    if (Parent.isSubprogram())
-      return false;
-    Parent = DD->resolve(Parent.getContext());
-  }
-  return true;
-}
-
-/// Return true if the type should be split out into a type unit.
-static bool shouldCreateDwarfTypeUnit(DICompositeType CTy,
-                                      const DwarfDebug *DD) {
-  if (!GenerateDwarfTypeUnits)
-    return false;
-
-  uint16_t Tag = CTy.getTag();
-
-  switch (Tag) {
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_enumeration_type:
-  case dwarf::DW_TAG_class_type:
-    // If this is a class, structure, union, or enumeration type
-    // that is a definition (not a declaration), and not scoped
-    // inside a function then separate this out as a type unit.
-    return !CTy.isForwardDecl() && isDwarfTypeUnitScoped(CTy, DD);
-  default:
-    return false;
-  }
-}
-
 /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
 /// given DIType.
 DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
@@ -989,11 +954,13 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const
     constructTypeDIE(*TyDIE, DIBasicType(Ty));
   else if (Ty.isCompositeType()) {
     DICompositeType CTy(Ty);
-    if (shouldCreateDwarfTypeUnit(CTy, DD)) {
-      DD->addDwarfTypeUnitType(getLanguage(), TyDIE, CTy);
-      // Skip updating the accellerator tables since this is not the full type
-      return TyDIE;
-    }
+    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
+      if (MDString *TypeId = CTy.getIdentifier()) {
+        DD->addDwarfTypeUnitType(getLanguage(), TypeId->getString(), TyDIE,
+                                 CTy);
+        // Skip updating the accellerator tables since this is not the full type
+        return TyDIE;
+      }
     constructTypeDIE(*TyDIE, CTy);
   } else {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");

Modified: llvm/trunk/test/DebugInfo/X86/c-type-units.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/c-type-units.ll?rev=198397&r1=198396&r2=198397&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/c-type-units.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/c-type-units.ll Thu Jan  2 22:20:26 2014
@@ -5,8 +5,9 @@
 ; struct foo {
 ; } f;
 
-; CHECK: DW_TAG_type_unit
-; CHECK-NEXT: DW_AT_language [DW_FORM_data2]    (0x000c)
+; no known LLVM frontends produce appropriate unique identifiers for C types, 
+; so we don't produce type units for them
+; CHECK-NOT: DW_TAG_type_unit
 
 %struct.foo = type {}
 

Modified: llvm/trunk/test/DebugInfo/X86/generate-odr-hash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/generate-odr-hash.ll?rev=198397&r1=198396&r2=198397&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/generate-odr-hash.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/generate-odr-hash.ll Thu Jan  2 22:20:26 2014
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -generate-odr-hash -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
 ; Generated from:
@@ -46,18 +46,31 @@
 ; CHECK-LABEL: .debug_info contents:
 ; CHECK: Compile Unit: length = [[CU_SIZE:[0-9a-f]+]]
 
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_signature
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT: DW_AT_signature
+
+; Ensure the CU-local type 'walrus' is not placed in a type unit.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: debug_str{{.*}}"walrus"
+; CHECK-NEXT: DW_AT_byte_size
+; CHECK-NEXT: DW_AT_decl_file
+; CHECK-NEXT: DW_AT_decl_line
+
 ; CHECK-LABEL: .debug_types contents:
 
 ; Check that we generate a hash for bar and the value.
-; CHECK-LABEL: type_signature = 0x6a7ee3d400662e88
-; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x200520c0d5b90eff)
+; CHECK-NOT: type_signature
+; CHECK-LABEL: type_signature = 0x1d02f3be30cc5688
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NEXT: debug_str{{.*}}"bar"
 
 
 ; Check that we generate a hash for fluffy and the value.
-; CHECK-LABEL: type_signature = 0x139b2e1ea94afec7
-; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8]   (0x9a0124d5a0c21c52)
+; CHECK-NOT: type_signature
+; CHECK-LABEL: type_signature = 0xb04af47397402e77
+; CHECK-NOT: DW_AT_GNU_odr_signature [DW_FORM_data8]   (0x9a0124d5a0c21c52)
 ; CHECK: DW_TAG_namespace
 ; CHECK-NEXT: debug_str{{.*}}"echidna"
 ; CHECK: DW_TAG_namespace
@@ -67,34 +80,22 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK-NEXT: debug_str{{.*}}"fluffy"
 
-; namespace and won't violate any ODR-ness.
-; CHECK-LABEL: type_signature = 0xc0d031d6449dbca7
-; CHECK: DW_TAG_type_unit
-; CHECK-NOT: NULL
-; We emit no hash for walrus since the type is contained in an anonymous
-; CHECK-NOT: DW_AT_GNU_odr_signature
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: debug_str{{.*}}"walrus"
-; CHECK-NEXT: DW_AT_byte_size
-; CHECK-NEXT: DW_AT_decl_file
-; CHECK-NEXT: DW_AT_decl_line
-; CHECK: DW_TAG_subprogram
-
 ; Check that we generate a hash for wombat and the value, but not for the
 ; anonymous type contained within.
-; CHECK-LABEL: type_signature = 0x73776f130648b986
-; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x685bcc220141e9d7)
+; CHECK-NOT: type_signature
+; CHECK-LABEL: type_signature = 0xfd756cee88f8a118
+; CHECK-NOT: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x685bcc220141e9d7)
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NEXT: debug_str{{.*}}"wombat"
 
-; CHECK-LABEL: type_signature = 0xbf6fc40e82583d7c
+; CHECK-NOT: type_signature
+; CHECK-LABEL: type_signature = 0xe94f6d3843e62d6b
 ; CHECK: DW_TAG_type_unit
 ; CHECK-NOT: NULL
-; Check that we generate no ODR hash for the anonymous type nested inside 'wombat'
 ; CHECK-NOT: DW_AT_GNU_odr_signature
 ; CHECK: DW_TAG_structure_type
 ; The signature for the outer 'wombat' type
-; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0x73776f130648b986)
+; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0xfd756cee88f8a118)
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NOT: DW_AT_name
 ; CHECK-NOT: DW_AT_GNU_odr_signature
@@ -107,18 +108,16 @@
 ; Don't emit pubtype entries for type DIEs in the compile unit that just indirect to a type unit.
 ; CHECK-NEXT: unit_size = [[CU_SIZE]]
 ; CHECK-NEXT: Offset Name
+; CHECK-NEXT: "walrus"
 ; Type unit for 'bar'
-; CHECK-NEXT: unit_size = 0x0000002b
+; CHECK-NEXT: unit_size = 0x00000023
 ; CHECK-NEXT: Offset Name
 ; CHECK-NEXT: "bar"
-; CHECK-NEXT: unit_size = 0x00000065
+; CHECK-NEXT: unit_size = 0x0000005d
 ; CHECK-NEXT: Offset Name
 ; CHECK-NEXT: "int"
 ; CHECK-NEXT: "echidna::capybara::mongoose::fluffy"
-; CHECK-NEXT: unit_size = 0x0000003b
-; CHECK-NEXT: Offset Name
-; CHECK-NEXT: "walrus"
-; CHECK-NEXT: unit_size = 0x00000042
+; CHECK-NEXT: unit_size = 0x0000003a
 ; CHECK-NEXT: Offset Name
 ; CHECK-NEXT: "wombat"
 ; CHECK-NEXT: unit_size = 0x0000004b





More information about the llvm-commits mailing list