[llvm] 2b74724 - [DWARFLinker] mark odr candidates inside the same object file.

Alexey Lapshin via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 28 09:49:06 PDT 2022


Author: Alexey Lapshin
Date: 2022-06-28T19:48:49+03:00
New Revision: 2b747241a6a0180c99dc4e46dbf62791080a9680

URL: https://github.com/llvm/llvm-project/commit/2b747241a6a0180c99dc4e46dbf62791080a9680
DIFF: https://github.com/llvm/llvm-project/commit/2b747241a6a0180c99dc4e46dbf62791080a9680.diff

LOG: [DWARFLinker] mark odr candidates inside the same object file.

This patch is extracted from D86539.

Current implementation of lookForDIEsToKeep() function skips types
duplications basing on the getCanonicalDIEOffset() data:

```
if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && (UseOdr || IsModuleRef) &&
    Info.Ctxt &&
    Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt &&
    Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr))  <<<<<
  continue;
```

But that field is set after all compile units inside object file are processed:

```
for (auto &CurrentUnit : OptContext.CompileUnits)
  lookForDIEsToKeep(.., &CurrentUnit, ..);  // check CanonicalDIEOffset

DIECloner.cloneAllCompileUnits(); // set CanonicalDIEOffset
```

Thus, if the object file contains several compilation units - types would
not be deduplicated. The above solution works well for the case when the object file
contains only one compilation unit. But if the object file contains several compilation
units then types would not be deduplicated between these compilation units.

This patch changes the algorithm so that types were deduplicated between
compilation units from the same object file.

It produces binary incompatible output for the cases when several compilation units
are located inside the same object file.

Reviewed By: aprantl

Differential Revision: https://reviews.llvm.org/D125469

Added: 
    llvm/test/tools/dsymutil/X86/odr-two-units-in-single-file.test

Modified: 
    llvm/include/llvm/DWARFLinker/DWARFLinker.h
    llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
    llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
    llvm/lib/DWARFLinker/DWARFLinker.cpp
    llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 2a1d31c0e72ad..0b2e033bd97a3 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -365,6 +365,8 @@ class DWARFLinker {
     /// Given a DIE, update its incompleteness based on whether the DIEs it
     /// references are incomplete.
     UpdateRefIncompleteness,
+    /// Given a DIE, mark it as ODR Canonical if applicable.
+    MarkODRCanonicalDie,
   };
 
   /// This class represents an item in the work list. The type defines what kind
@@ -464,6 +466,10 @@ class DWARFLinker {
                             const DWARFFile &File,
                             SmallVectorImpl<WorklistItem> &Worklist);
 
+  /// Mark context corresponding to the specified \p Die as having canonical
+  /// die, if applicable.
+  void markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU);
+
   /// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries.
   ///
   /// @{

diff  --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
index 41a2d1ffb1ca5..788275782235c 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -74,6 +74,12 @@ class CompileUnit {
 
     /// Does DIE transitively refer an incomplete decl?
     bool Incomplete : 1;
+
+    /// Is DIE in the clang module scope?
+    bool InModuleScope : 1;
+
+    /// Is ODR marking done?
+    bool ODRMarkingDone : 1;
   };
 
   CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,

diff  --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
index 91f65144952da..fb02b0fc1b4df 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
@@ -18,6 +18,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include <atomic>
 
 namespace llvm {
 
@@ -91,6 +92,10 @@ class DeclContext {
 
   bool setLastSeenDIE(CompileUnit &U, const DWARFDie &Die);
 
+  void setHasCanonicalDIE() { HasCanonicalDIE = true; }
+
+  bool hasCanonicalDIE() const { return HasCanonicalDIE; }
+
   uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
   void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
 
@@ -112,7 +117,8 @@ class DeclContext {
   const DeclContext &Parent;
   DWARFDie LastSeenDIE;
   uint32_t LastSeenCompileUnitID = 0;
-  uint32_t CanonicalDIEOffset = 0;
+  std::atomic<uint32_t> CanonicalDIEOffset = {0};
+  bool HasCanonicalDIE = false;
 };
 
 /// This class gives a tree-like API to the DenseMap that stores the

diff  --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index e4fb7b22806a2..50c52190c1f61 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -361,16 +361,16 @@ static bool analyzeContextInfo(
     }
 
     Info.ParentIdx = Current.ParentIdx;
-    bool InClangModule = CU.isClangModule() || Current.InImportedModule;
-    if (CU.hasODR() || InClangModule) {
+    Info.InModuleScope = CU.isClangModule() || Current.InImportedModule;
+    if (CU.hasODR() || Info.InModuleScope) {
       if (Current.Context) {
         auto PtrInvalidPair = Contexts.getChildDeclContext(
-            *Current.Context, Current.Die, CU, InClangModule);
+            *Current.Context, Current.Die, CU, Info.InModuleScope);
         Current.Context = PtrInvalidPair.getPointer();
         Info.Ctxt =
             PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
         if (Info.Ctxt)
-          Info.Ctxt->setDefinedInClangModule(InClangModule);
+          Info.Ctxt->setDefinedInClangModule(Info.InModuleScope);
       } else
         Info.Ctxt = Current.Context = nullptr;
     }
@@ -616,6 +616,27 @@ void DWARFLinker::lookForChildDIEsToKeep(
   }
 }
 
+static bool isODRCanonicalCandidate(const DWARFDie &Die, CompileUnit &CU) {
+  CompileUnit::DIEInfo &Info = CU.getInfo(Die);
+
+  if (!Info.Ctxt || (Die.getTag() == dwarf::DW_TAG_namespace))
+    return false;
+
+  if (!CU.hasODR() && !Info.InModuleScope)
+    return false;
+
+  return !Info.Incomplete && Info.Ctxt != CU.getInfo(Info.ParentIdx).Ctxt;
+}
+
+void DWARFLinker::markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU) {
+  CompileUnit::DIEInfo &Info = CU.getInfo(Die);
+
+  Info.ODRMarkingDone = true;
+  if (Info.Keep && isODRCanonicalCandidate(Die, CU) &&
+      !Info.Ctxt->hasCanonicalDIE())
+    Info.Ctxt->setHasCanonicalDIE();
+}
+
 /// Look at DIEs referenced by the given DIE and decide whether they should be
 /// kept. All DIEs referenced though attributes should be kept.
 void DWARFLinker::lookForRefDIEsToKeep(
@@ -645,8 +666,6 @@ void DWARFLinker::lookForRefDIEsToKeep(
     if (auto RefDie =
             resolveDIEReference(File, Units, Val, Die, ReferencedCU)) {
       CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefDie);
-      bool IsModuleRef = Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset() &&
-                         Info.Ctxt->isDefinedInClangModule();
       // If the referenced DIE has a DeclContext that has already been
       // emitted, then do not keep the one in this CU. We'll link to
       // the canonical DIE in cloneDieReferenceAttribute.
@@ -657,15 +676,14 @@ void DWARFLinker::lookForRefDIEsToKeep(
       //
       // FIXME: compatibility with dsymutil-classic. There is no
       // reason not to unique ref_addr references.
-      if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && (UseOdr || IsModuleRef) &&
-          Info.Ctxt &&
-          Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt &&
-          Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr))
+      if (AttrSpec.Form != dwarf::DW_FORM_ref_addr &&
+          isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
+          Info.Ctxt->hasCanonicalDIE())
         continue;
 
       // Keep a module forward declaration if there is no definition.
       if (!(isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
-            Info.Ctxt->getCanonicalDIEOffset()))
+            Info.Ctxt->hasCanonicalDIE()))
         Info.Prune = false;
       ReferencedDIEs.emplace_back(RefDie, *ReferencedCU);
     }
@@ -756,6 +774,9 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
       lookForParentDIEsToKeep(Current.AncestorIdx, Current.CU, Current.Flags,
                               Worklist);
       continue;
+    case WorklistItemType::MarkODRCanonicalDie:
+      markODRCanonicalDie(Current.Die, Current.CU);
+      continue;
     case WorklistItemType::LookForDIEsToKeep:
       break;
     }
@@ -778,6 +799,16 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
       Current.Flags = shouldKeepDIE(AddressesMap, Ranges, Current.Die, File,
                                     Current.CU, MyInfo, Current.Flags);
 
+    // We need to mark context for the canonical die in the end of normal
+    // traversing(not TF_DependencyWalk) or after normal traversing if die
+    // was not marked as kept.
+    if (!(Current.Flags & TF_DependencyWalk) ||
+        (MyInfo.ODRMarkingDone && !MyInfo.Keep)) {
+      if (Current.CU.hasODR() || MyInfo.InModuleScope)
+        Worklist.emplace_back(Current.Die, Current.CU,
+                              WorklistItemType::MarkODRCanonicalDie);
+    }
+
     // Finish by looking for child DIEs. Because of the LIFO worklist we need
     // to schedule that work before any subsequent items are added to the
     // worklist.
@@ -845,7 +876,7 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
 
 unsigned DWARFLinker::DIECloner::cloneStringAttribute(
     DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
-    const DWARFUnit &U, OffsetsStringPool &StringPool, AttributesInfo &Info) {
+    const DWARFUnit &, OffsetsStringPool &StringPool, AttributesInfo &Info) {
   Optional<const char *> String = dwarf::toString(Val);
   if (!String)
     return 0;
@@ -875,7 +906,6 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
 
   DIE *NewRefDie = nullptr;
   CompileUnit *RefUnit = nullptr;
-  DeclContext *Ctxt = nullptr;
 
   DWARFDie RefDie =
       Linker.resolveDIEReference(File, CompileUnits, Val, InputDIE, RefUnit);
@@ -888,14 +918,14 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
 
   // If we already have emitted an equivalent DeclContext, just point
   // at it.
-  if (isODRAttribute(AttrSpec.Attr)) {
-    Ctxt = RefInfo.Ctxt;
-    if (Ctxt && Ctxt->getCanonicalDIEOffset()) {
-      DIEInteger Attr(Ctxt->getCanonicalDIEOffset());
-      Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
-                   dwarf::DW_FORM_ref_addr, Attr);
-      return U.getRefAddrByteSize();
-    }
+  if (isODRAttribute(AttrSpec.Attr) && RefInfo.Ctxt &&
+      RefInfo.Ctxt->getCanonicalDIEOffset()) {
+    assert(RefInfo.Ctxt->hasCanonicalDIE() &&
+           "Offset to canonical die is set, but context is not marked");
+    DIEInteger Attr(RefInfo.Ctxt->getCanonicalDIEOffset());
+    Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+                 dwarf::DW_FORM_ref_addr, Attr);
+    return U.getRefAddrByteSize();
   }
 
   if (!RefInfo.Clone) {
@@ -925,7 +955,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
       // A forward reference. Note and fixup later.
       Attr = 0xBADDEF;
       Unit.noteForwardReference(
-          NewRefDie, RefUnit, Ctxt,
+          NewRefDie, RefUnit, RefInfo.Ctxt,
           Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
                        dwarf::DW_FORM_ref_addr, DIEInteger(Attr)));
     }
@@ -1356,10 +1386,10 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
 
   assert(Die->getTag() == InputDIE.getTag());
   Die->setOffset(OutOffset);
-  if ((Unit.hasODR() || Unit.isClangModule()) && !Info.Incomplete &&
-      Die->getTag() != dwarf::DW_TAG_namespace && Info.Ctxt &&
-      Info.Ctxt != Unit.getInfo(Info.ParentIdx).Ctxt &&
-      !Info.Ctxt->getCanonicalDIEOffset()) {
+  if (isODRCanonicalCandidate(InputDIE, Unit) && Info.Ctxt &&
+      (Info.Ctxt->getCanonicalDIEOffset() == 0)) {
+    if (!Info.Ctxt->hasCanonicalDIE())
+      Info.Ctxt->setHasCanonicalDIE();
     // We are about to emit a DIE that is the root of its own valid
     // DeclContext tree. Make the current offset the canonical offset
     // for this context.

diff  --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index acecb1788d104..e9e8be7fd0083 100644
--- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -90,9 +90,11 @@ void CompileUnit::fixupForwardReferences() {
     PatchLocation Attr;
     DeclContext *Ctxt;
     std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
-    if (Ctxt && Ctxt->getCanonicalDIEOffset())
+    if (Ctxt && Ctxt->hasCanonicalDIE()) {
+      assert(Ctxt->getCanonicalDIEOffset() &&
+             "Canonical die offset is not set");
       Attr.set(Ctxt->getCanonicalDIEOffset());
-    else
+    } else
       Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
   }
 }

diff  --git a/llvm/test/tools/dsymutil/X86/odr-two-units-in-single-file.test b/llvm/test/tools/dsymutil/X86/odr-two-units-in-single-file.test
new file mode 100644
index 0000000000000..c4c833700fefd
--- /dev/null
+++ b/llvm/test/tools/dsymutil/X86/odr-two-units-in-single-file.test
@@ -0,0 +1,200 @@
+## This test checks debug info for the types located into the
+## 
diff erent compilation units from the single object file.
+## Type definition for the "class1" should be removed from the
+## second compilation unit.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: echo '---' > %t2.map
+# RUN: echo "triple:          'x86_64-apple-darwin'" >> %t2.map
+# RUN: echo 'objects:'  >> %t2.map
+# RUN: echo " -  filename: '%t.o'" >> %t2.map
+# RUN: echo '    symbols:' >> %t2.map
+# RUN: echo '      - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }' >> %t2.map
+# RUN: echo '...' >> %t2.map
+# RUN: dsymutil -y %t2.map -f -o - | llvm-dwarfdump -a - | FileCheck %s
+
+# CHECK: file format Mach-O 64-bit x86-64
+# CHECK: .debug_info contents:
+# CHECK: Compile Unit:
+# CHECK: DW_TAG_compile_unit
+# CHECK: DW_AT_name{{.*}}"CU1"
+# CHECK: DW_TAG_class_type{{.*[[:space:]].*}}DW_AT_name{{.*}}"class1"
+# CHECK: DW_TAG_variable
+# CHECK: DW_AT_name{{.*}}"var1"
+# CHECK: DW_AT_const_value
+# CHECK: DW_AT_type (0x00000000[[CLASS1:[0-9a-f]*]]
+
+# CHECK: Compile Unit:
+# CHECK: DW_TAG_compile_unit
+# CHECK: DW_AT_name{{.*}}"CU2"
+# CHECK-NOT: DW_TAG_class_type
+# CHECK-NOT: "class1"
+# CHECK: DW_TAG_variable
+# CHECK: DW_AT_name{{.*}}"var2"
+# CHECK: DW_AT_const_value
+# CHECK: DW_AT_type (0x00000000[[CLASS1]]
+
+
+
+--- !mach-o
+FileHeader:
+  magic:      0xFEEDFACF
+  cputype:    0x01000007
+  cpusubtype: 0x00000003
+  filetype:   0x00000001
+  ncmds:      2
+  sizeofcmds: 376
+  flags:      0x00002000
+  reserved:   0x00000000
+LoadCommands:
+  - cmd:      LC_SEGMENT_64
+    cmdsize:  232
+    segname:  ''
+    vmaddr:   0x00
+    vmsize:   0x300
+    fileoff:  0x300
+    filesize: 0x300
+    maxprot:  7
+    initprot: 7
+    nsects:   2
+    flags:    0
+    Sections:
+      - sectname:  __debug_abbrev
+        segname:   __DWARF
+        addr:      0x000000000000000F
+        size:      0x3c
+        offset:    0x00000380
+        align:     0
+        reloff:    0x00000000
+        nreloc:    0
+        flags:     0x02000000
+        reserved1: 0x00000000
+        reserved2: 0x00000000
+        reserved3: 0x00000000
+      - sectname:  __debug_info
+        segname:   __DWARF
+        addr:      0x000000000000100
+        size:      0x62
+        offset:    0x00000410
+        align:     0
+        reloff:    0x00000600
+        nreloc:    1
+        flags:     0x02000000
+        reserved1: 0x00000000
+        reserved2: 0x00000000
+        reserved3: 0x00000000
+        relocations:
+          - address:         0x1FC
+            symbolnum:       1
+            pcrel:           true
+            length:          3
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          0x700
+    nsyms:           2
+    stroff:          0x720
+    strsize:         10
+LinkEditData:
+  NameList:
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - '__Z3foov'
+    - ''
+DWARF:
+  debug_abbrev:
+    - Table:
+      - Tag:      DW_TAG_compile_unit
+        Children: DW_CHILDREN_yes
+        Attributes:
+          - Attribute: DW_AT_producer
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_language
+            Form:      DW_FORM_data2
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+      - Tag:      DW_TAG_class_type
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+      - Tag:      DW_TAG_variable
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_const_value
+            Form:      DW_FORM_data4
+          - Attribute: DW_AT_type
+            Form:      DW_FORM_ref4
+    - Table:
+      - Tag:      DW_TAG_compile_unit
+        Children: DW_CHILDREN_yes
+        Attributes:
+          - Attribute: DW_AT_producer
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_language
+            Form:      DW_FORM_data2
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+      - Tag:      DW_TAG_class_type
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+      - Tag:      DW_TAG_variable
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_const_value
+            Form:      DW_FORM_data4
+          - Attribute: DW_AT_type
+            Form:      DW_FORM_ref4
+  debug_info:
+    - Version: 4
+      Entries:
+        - AbbrCode: 1
+          Values:
+            - CStr: by_hand
+            - Value:  0x04
+            - CStr: CU1
+        - AbbrCode: 2
+          Values:
+            - CStr: class1
+        - AbbrCode: 3
+          Values:
+            - CStr: var1
+            - Value:  0x00000000
+            - Value:  0x0000001a
+        - AbbrCode: 0
+    - Version: 4
+      Entries:
+        - AbbrCode: 1
+          Values:
+            - CStr: by_hand
+            - Value:  0x04
+            - CStr: CU2
+        - AbbrCode: 2
+          Values:
+            - CStr: class1
+        - AbbrCode: 3
+          Values:
+            - CStr: var2
+            - Value:  0x00000000
+            - Value:  0x0000001a
+        - AbbrCode: 0
+...


        


More information about the llvm-commits mailing list