[llvm] 2bddab2 - DebugInfo: Don't hash DIE offsets before they're computed

Sat Dec 25 16:09:42 PST 2021

Author: David Blaikie
Date: 2021-12-25T16:09:12-08:00
New Revision: 2bddab25dba8d4b0932dc2b6cacef13fcf8a0694

URL: https://github.com/llvm/llvm-project/commit/2bddab25dba8d4b0932dc2b6cacef13fcf8a0694
DIFF: https://github.com/llvm/llvm-project/commit/2bddab25dba8d4b0932dc2b6cacef13fcf8a0694.diff

LOG: DebugInfo: Don't hash DIE offsets before they're computed

Instead of hashing DIE offsets, hash DIE references the same as they
would be when used outside of a loclist - that is, deep hash the type on
first use, and hash the numbering on subsequent uses.

This does produce different hashes for different type references, where
it did not before (because we were hashing zero all the time - so it
didn't matter what type was referenced, the hash would be identical).

This also allows us to enforce that the DIE offset (& size) is not
queried before it is used (which came up while investigating another bug
recently).

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/DIE.h
    llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
    llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
    llvm/lib/CodeGen/AsmPrinter/DIEHash.h
    llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/test/DebugInfo/X86/convert-loclist.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h
index 9e94c401bfae5..51320aea0327d 100644

--- a/llvm/include/llvm/CodeGen/DIE.h
+++ b/llvm/include/llvm/CodeGen/DIE.h
@@ -774,8 +774,16 @@ class DIE : IntrusiveBackListNode, public DIEValueList {
   unsigned getAbbrevNumber() const { return AbbrevNumber; }
   dwarf::Tag getTag() const { return Tag; }
   /// Get the compile/type unit relative offset of this DIE.
-  unsigned getOffset() const { return Offset; }
-  unsigned getSize() const { return Size; }
+  unsigned getOffset() const {
+    // A real Offset can't be zero because the unit headers are at offset zero.
+    assert(Offset && "Offset being queried before it's been computed.");
+    return Offset;
+  }
+  unsigned getSize() const {
+    // A real Size can't be zero because it includes the non-empty abbrev code.
+    assert(Size && "Size being queried before it's been ocmputed.");
+    return Size;
+  }
   bool hasChildren() const { return ForceChildren || !Children.empty(); }
   void setForceChildren(bool B) { ForceChildren = B; }
 

diff  --git a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 5e7db1f2f76ca..7525e5865282b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -33,6 +33,7 @@ class ByteStreamer {
   virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
   virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
                            unsigned PadTo = 0) = 0;
+  virtual void emitDIERef(const DIE &D) = 0;
 };
 
 class APByteStreamer final : public ByteStreamer {
@@ -54,15 +55,21 @@ class APByteStreamer final : public ByteStreamer {
     AP.OutStreamer->AddComment(Comment);
     AP.emitULEB128(DWord, nullptr, PadTo);
   }
+  void emitDIERef(const DIE &D) override {
+    uint64_t Offset = D.getOffset();
+    static constexpr unsigned ULEB128PadSize = 4;
+    assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+    emitULEB128(Offset, "", ULEB128PadSize);
+  }
 };
 
 class HashingByteStreamer final : public ByteStreamer {
  private:
   DIEHash &Hash;
  public:
- HashingByteStreamer(DIEHash &H) : Hash(H) {}
-  void emitInt8(uint8_t Byte, const Twine &Comment) override {
-    Hash.update(Byte);
+   HashingByteStreamer(DIEHash &H) : Hash(H) {}
+   void emitInt8(uint8_t Byte, const Twine &Comment) override {
+     Hash.update(Byte);
   }
   void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
     Hash.addSLEB128(DWord);
@@ -71,6 +78,7 @@ class HashingByteStreamer final : public ByteStreamer {
                    unsigned PadTo) override {
     Hash.addULEB128(DWord);
   }
+  void emitDIERef(const DIE &D) override { Hash.hashRawTypeReference(D); }
 };
 
 class BufferByteStreamer final : public ByteStreamer {
@@ -115,9 +123,14 @@ class BufferByteStreamer final : public ByteStreamer {
       // with each other.
       for (size_t i = 1; i < Length; ++i)
         Comments.push_back("");
-
     }
   }
+  void emitDIERef(const DIE &D) override {
+    uint64_t Offset = D.getOffset();
+    static constexpr unsigned ULEB128PadSize = 4;
+    assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+    emitULEB128(Offset, "", ULEB128PadSize);
+  }
 };
 
 }

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5f4ee747fcca3..b7b26199956a9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
   computeHash(Entry);
 }
 
+void DIEHash::hashRawTypeReference(const DIE &Entry) {
+  unsigned &DieNumber = Numbering[&Entry];
+  if (DieNumber) {
+    addULEB128('R');
+    addULEB128(DieNumber);
+    return;
+  }
+  DieNumber = Numbering.size();
+  addULEB128('T');
+  computeHash(Entry);
+}
+
 // Hash all of the values in a block like set of values. This assumes that
 // all of the data is going to be added as integers.
 void DIEHash::hashBlockData(const DIE::const_value_range &Values) {

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 29e1da4c5d605..24a973b392715 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -62,6 +62,8 @@ class DIEHash {
   /// Encodes and adds \param Value to the hash as a SLEB128.
   void addSLEB128(int64_t Value);
 
+  void hashRawTypeReference(const DIE &Entry);
+
 private:
   /// Adds \param Str to the hash and includes a NULL byte.
   void addString(StringRef Str);

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 48134f1fd7747..b129aa1716698 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2539,14 +2539,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
       if (Op.getDescription().Op[I] == Encoding::SizeNA)
         continue;
       if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
-        uint64_t Offset =
-            CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
-        assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
-        Streamer.emitULEB128(Offset, "", ULEB128PadSize);
-        // Make sure comments stay aligned.
-        for (unsigned J = 0; J < ULEB128PadSize; ++J)
-          if (Comment != End)
-            Comment++;
+        Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
       } else {
         for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
           Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");

diff  --git a/llvm/test/DebugInfo/X86/convert-loclist.ll b/llvm/test/DebugInfo/X86/convert-loclist.ll
index d732840d4924e..56dede02c51d1 100644
--- a/llvm/test/DebugInfo/X86/convert-loclist.ll
+++ b/llvm/test/DebugInfo/X86/convert-loclist.ll
@@ -13,7 +13,7 @@
 ; often - add another IR file with a 
diff erent DW_OP_convert that's otherwise
 ; identical and demonstrate that they have 
diff erent DWO IDs.
 
-; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xecf2563326b0bdd3
+; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xa6edbf487b0a7acf
 
 ; Regression testing a fairly quirky bug where instead of hashing (see above),
 ; extra bytes would be emitted into the output assembly in no