[llvm] r188028 - Move hash computation code into a separate class and file.

Eric Christopher echristo at gmail.com
Sat Aug 10 13:50:34 PDT 2013


On Sat, Aug 10, 2013 at 1:42 PM, David Blaikie <dblaikie at gmail.com> wrote:
>
>
>
> On Thu, Aug 8, 2013 at 4:45 PM, Eric Christopher <echristo at gmail.com> wrote:
>>
>> Author: echristo
>> Date: Thu Aug  8 18:45:55 2013
>> New Revision: 188028
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=188028&view=rev
>> Log:
>> Move hash computation code into a separate class and file.
>>
>> No functional change intended.
>>
>> Added:
>>     llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp
>>     llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.h
>> Modified:
>>     llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
>>
>> Added: llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp?rev=188028&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp (added)
>> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp Thu Aug  8 18:45:55 2013
>> @@ -0,0 +1,136 @@
>> +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework
>> ----------------===//
>> +//
>> +//                     The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open Source
>> +// License. See LICENSE.TXT for details.
>> +//
>>
>> +//===----------------------------------------------------------------------===//
>> +//
>> +// This file contains support for DWARF4 hashing of DIEs.
>> +//
>>
>> +//===----------------------------------------------------------------------===//
>> +
>> +#define DEBUG_TYPE "dwarfdebug"
>> +
>> +#include "DIE.h"
>> +#include "DIEHash.h"
>> +#include "DwarfCompileUnit.h"
>> +#include "llvm/ADT/ArrayRef.h"
>> +#include "llvm/ADT/StringRef.h"
>> +#include "llvm/Support/Debug.h"
>> +#include "llvm/Support/Dwarf.h"
>> +#include "llvm/Support/Endian.h"
>> +#include "llvm/Support/MD5.h"
>> +#include "llvm/Support/raw_ostream.h"
>> +
>> +using namespace llvm;
>> +
>> +/// \brief Grabs the string in whichever attribute is passed in and
>> returns
>> +/// a reference to it.
>> +static StringRef getDIEStringAttr(DIE *Die, uint16_t Attr) {
>> +  const SmallVectorImpl<DIEValue *> &Values = Die->getValues();
>> +  const DIEAbbrev &Abbrevs = Die->getAbbrev();
>> +
>> +  // Iterate through all the attributes until we find the one we're
>> +  // looking for, if we can't find it return an empty string.
>> +  for (size_t i = 0; i < Values.size(); ++i) {
>> +    if (Abbrevs.getData()[i].getAttribute() == Attr) {
>> +      DIEValue *V = Values[i];
>> +      assert(isa<DIEString>(V) && "String requested. Not a string.");
>
>
> cast already asserts when !isa - so this assert is unnecessary
>
>>
>> +      DIEString *S = cast<DIEString>(V);
>> +      return S->getString();
>> +    }
>> +  }
>> +  return StringRef("");
>> +}
>> +
>> +/// \brief Adds the string in \p Str to the hash. This also hashes
>> +/// a trailing NULL with the string.
>> +void DIEHash::addString(StringRef Str) {
>> +  DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
>> +  Hash.update(Str);
>> +  Hash.update(makeArrayRef((uint8_t)'\0'));
>> +}
>> +
>> +// FIXME: The LEB128 routines are copied and only slightly modified out
>> of
>> +// LEB128.h.
>> +
>> +/// \brief Adds the unsigned in \p Value to the hash encoded as a
>> ULEB128.
>> +void DIEHash::addULEB128(uint64_t Value) {
>> +  DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
>> +  do {
>> +    uint8_t Byte = Value & 0x7f;
>> +    Value >>= 7;
>> +    if (Value != 0)
>> +      Byte |= 0x80; // Mark this byte to show that more bytes will
>> follow.
>> +    Hash.update(Byte);
>> +  } while (Value != 0);
>> +}
>> +
>> +/// \brief Including \p Parent adds the context of Parent to the hash..
>> +void DIEHash::addParentContext(DIE *Parent) {
>> +
>> +  DEBUG(dbgs() << "Adding parent context to hash...\n");
>> +
>> +  // [7.27.2] For each surrounding type or namespace beginning with the
>> +  // outermost such construct...
>> +  SmallVector<DIE *, 1> Parents;
>> +  while (Parent->getTag() != dwarf::DW_TAG_compile_unit) {
>> +    Parents.push_back(Parent);
>> +    Parent = Parent->getParent();
>> +  }
>> +
>> +  // Reverse iterate over our list to go from the outermost construct to
>> the
>> +  // innermost.
>> +  for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(),
>> +                                                E = Parents.rend();
>> +       I != E; ++I) {
>> +    DIE *Die = *I;
>> +
>> +    // ... Append the letter "C" to the sequence...
>> +    addULEB128('C');
>> +
>> +    // ... Followed by the DWARF tag of the construct...
>> +    addULEB128(Die->getTag());
>> +
>> +    // ... Then the name, taken from the DW_AT_name attribute.
>> +    StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
>> +    DEBUG(dbgs() << "... adding context: " << Name << "\n");
>> +    if (!Name.empty())
>> +      addString(Name);
>> +  }
>> +}
>> +
>> +/// This is based on the type signature computation given in section 7.27
>> of the
>> +/// DWARF4 standard. It is the md5 hash of a flattened description of the
>> DIE
>> +/// with
>> +/// the exception that we are hashing only the context and the name of
>> the type.
>> +uint64_t DIEHash::computeDIEODRSignature(DIE *Die) {
>> +
>> +  // Add the contexts to the hash. We won't be computing the ODR hash for
>> +  // function local types so it's safe to use the generic context hashing
>> +  // algorithm here.
>> +  // FIXME: If we figure out how to account for linkage in some way we
>> could
>> +  // actually do this with a slight modification to the parent hash
>> algorithm.
>> +  DIE *Parent = Die->getParent();
>> +  if (Parent)
>> +    addParentContext(Parent);
>> +
>> +  // Add the current DIE information.
>> +
>> +  // Add the DWARF tag of the DIE.
>> +  addULEB128(Die->getTag());
>> +
>> +  // Add the name of the type to the hash.
>> +  addString(getDIEStringAttr(Die, dwarf::DW_AT_name));
>> +
>> +  // Now get the result.
>> +  MD5::MD5Result Result;
>> +  Hash.final(Result);
>> +
>> +  // ... take the least significant 8 bytes and return those. Our MD5
>> +  // implementation always returns its results in little endian, swap
>> bytes
>> +  // appropriately.
>> +  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
>> +}
>>
>> Added: llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.h?rev=188028&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.h (added)
>> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.h Thu Aug  8 18:45:55 2013
>> @@ -0,0 +1,46 @@
>> +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++
>> -*--===//
>> +//
>> +//                     The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open Source
>> +// License. See LICENSE.TXT for details.
>> +//
>>
>> +//===----------------------------------------------------------------------===//
>> +//
>> +// This file contains support for DWARF4 hashing of DIEs.
>> +//
>>
>> +//===----------------------------------------------------------------------===//
>> +
>> +#include "llvm/Support/MD5.h"
>> +
>> +namespace llvm {
>> +
>> +class CompileUnit;
>> +
>> +/// \brief An object containing the capability of hashing and adding hash
>> +/// attributes onto a DIE.
>> +class DIEHash {
>> +public:
>> +  /// \brief Initializes. The hash is default initialized.
>> +  DIEHash() {}
>
>
> You could leave this ctor implicit. Though I'm not sure about this whole
> class as an abstraction - it has one stateless function. Why isn't that just
> a free function?
>

Because it's going to need some state here shortly.

-eric

>>
>> +
>> +  /// \brief Computes the ODR signature
>> +  uint64_t computeDIEODRSignature(DIE *Die);
>>
>> +
>> +  // Helper routines to process parts of a DIE.
>> + private:
>> +  /// \brief Adds the parent context of \param Die to the hash.
>> +  void addParentContext(DIE *Die);
>> +
>> +  // Routines that add DIEValues to the hash.
>> +private:
>> +  /// \brief Encodes and adds \param Value to the hash as a ULEB128.
>> +  void addULEB128(uint64_t Value);
>> +
>> +  /// \brief Adds \param Str to the hash and includes a NULL byte.
>> +  void addString(StringRef Str);
>> +
>> +private:
>> +  MD5 Hash;
>> +};
>> +}
>>
>> Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=188028&r1=188027&r2=188028&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Thu Aug  8 18:45:55
>> 2013
>> @@ -14,6 +14,7 @@
>>  #define DEBUG_TYPE "dwarfdebug"
>>  #include "DwarfDebug.h"
>>  #include "DIE.h"
>> +#include "DIEHash.h"
>>  #include "DwarfAccelTable.h"
>>  #include "DwarfCompileUnit.h"
>>  #include "llvm/ADT/STLExtras.h"
>> @@ -962,8 +963,7 @@ void DwarfDebug::collectDeadVariables()
>>    DeleteContainerSeconds(DeadFnScopeMap);
>>  }
>>
>> -// Type Signature [7.27] computation code.
>> -typedef ArrayRef<uint8_t> HashValue;
>> +// Type Signature [7.27] and ODR Hash code.
>>
>>  /// \brief Grabs the string in whichever attribute is passed in and
>> returns
>>  /// a reference to it. Returns "" if the attribute doesn't exist.
>> @@ -976,100 +976,6 @@ static StringRef getDIEStringAttr(DIE *D
>>    return StringRef("");
>>  }
>>
>> -/// \brief Adds the string in \p Str to the hash in \p Hash. This also
>> hashes
>> -/// a trailing NULL with the string.
>> -static void addStringToHash(MD5 &Hash, StringRef Str) {
>> -  DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
>> -  Hash.update(Str);
>> -  Hash.update(makeArrayRef((uint8_t)'\0'));
>> -}
>> -
>> -// FIXME: These are copied and only slightly modified out of LEB128.h.
>> -
>> -/// \brief Adds the unsigned in \p N to the hash in \p Hash. This also
>> encodes
>> -/// the unsigned as a ULEB128.
>> -static void addULEB128ToHash(MD5 &Hash, uint64_t Value) {
>> -  DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
>> -  do {
>> -    uint8_t Byte = Value & 0x7f;
>> -    Value >>= 7;
>> -    if (Value != 0)
>> -      Byte |= 0x80; // Mark this byte to show that more bytes will
>> follow.
>> -    Hash.update(Byte);
>> -  } while (Value != 0);
>> -}
>> -
>> -/// \brief Including \p Parent adds the context of Parent to \p Hash.
>> -static void addParentContextToHash(MD5 &Hash, DIE *Parent) {
>> -
>> -  DEBUG(dbgs() << "Adding parent context to hash...\n");
>> -
>> -  // [7.27.2] For each surrounding type or namespace beginning with the
>> -  // outermost such construct...
>> -  SmallVector<DIE *, 1> Parents;
>> -  while (Parent->getTag() != dwarf::DW_TAG_compile_unit) {
>> -    Parents.push_back(Parent);
>> -    Parent = Parent->getParent();
>> -  }
>> -
>> -  // Reverse iterate over our list to go from the outermost construct to
>> the
>> -  // innermost.
>> -  for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(),
>> -                                                E = Parents.rend();
>> -       I != E; ++I) {
>> -    DIE *Die = *I;
>> -
>> -    // ... Append the letter "C" to the sequence...
>> -    addULEB128ToHash(Hash, 'C');
>> -
>> -    // ... Followed by the DWARF tag of the construct...
>> -    addULEB128ToHash(Hash, Die->getTag());
>> -
>> -    // ... Then the name, taken from the DW_AT_name attribute.
>> -    StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
>> -    DEBUG(dbgs() << "... adding context: " << Name << "\n");
>> -    if (!Name.empty())
>> -      addStringToHash(Hash, Name);
>> -  }
>> -}
>> -
>> -/// This is based on the type signature computation given in section 7.27
>> of the
>> -/// DWARF4 standard. It is the md5 hash of a flattened description of the
>> DIE with
>> -/// the exception that we are hashing only the context and the name of
>> the type.
>> -static void addDIEODRSignature(MD5 &Hash, CompileUnit *CU, DIE *Die) {
>> -
>> -  // Add the contexts to the hash. We won't be computing the ODR hash for
>> -  // function local types so it's safe to use the generic context hashing
>> -  // algorithm here.
>> -  // FIXME: If we figure out how to account for linkage in some way we
>> could
>> -  // actually do this with a slight modification to the parent hash
>> algorithm.
>> -  DIE *Parent = Die->getParent();
>> -  if (Parent)
>> -    addParentContextToHash(Hash, Parent);
>> -
>> -  // Add the current DIE information.
>> -
>> -  // Add the DWARF tag of the DIE.
>> -  addULEB128ToHash(Hash, Die->getTag());
>> -
>> -  // Add the name of the type to the hash.
>> -  addStringToHash(Hash, getDIEStringAttr(Die, dwarf::DW_AT_name));
>> -
>> -  // Now get the result.
>> -  MD5::MD5Result Result;
>> -  Hash.final(Result);
>> -
>> -  // ... take the least significant 8 bytes and store those as the
>> attribute.
>> -  // Our MD5 implementation always returns its results in little endian,
>> swap
>> -  // bytes appropriately.
>> -  uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result +
>> 8);
>> -
>> -  // FIXME: This should be added onto the type unit, not the type, but
>> this
>> -  // works as an intermediate stage.
>> -  CU->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, dwarf::DW_FORM_data8,
>> -              Signature);
>> -}
>> -
>>  /// Return true if the current DIE is contained within an anonymous
>> namespace.
>>  static bool isContainedInAnonNamespace(DIE *Die) {
>>    DIE *Parent = Die->getParent();
>> @@ -1090,7 +996,7 @@ static bool shouldAddODRHash(CompileUnit
>>    return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
>>           getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
>>           !isContainedInAnonNamespace(Die);
>> - }
>> +}
>>
>>  void DwarfDebug::finalizeModuleInfo() {
>>    // Collect info for variables that were optimized out.
>> @@ -1111,12 +1017,16 @@ void DwarfDebug::finalizeModuleInfo() {
>>    // out type.
>>    // FIXME: Do type splitting.
>>    for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) {
>> -    MD5 Hash;
>>      DIE *Die = TypeUnits[i];
>> +    DIEHash Hash;
>>      // If we've requested ODR hashes and it's applicable for an ODR hash
>> then
>>      // add the ODR signature now.
>> +    // FIXME: This should be added onto the type unit, not the type, but
>> this
>> +    // works as an intermediate stage.
>>      if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die))
>> -      addDIEODRSignature(Hash, CUMap.begin()->second, Die);
>> +      CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature,
>> +                                     dwarf::DW_FORM_data8,
>> +                                     Hash.computeDIEODRSignature(Die));
>>    }
>>
>>     // Compute DIE offsets and sizes.
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>



More information about the llvm-commits mailing list