[llvm] r268268 - Parse PDB Name Hash Table

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Mon May 2 11:09:15 PDT 2016


Author: zturner
Date: Mon May  2 13:09:14 2016
New Revision: 268268

URL: http://llvm.org/viewvc/llvm-project?rev=268268&view=rev
Log:
Parse PDB Name Hash Table

PDB has a lot of similar data structures.  We already have code
for parsing a Name Map, but PDB seems to have a different but
very similar structure that is a hash table.  This is the
beginning of code needed in order to parse the name hash table,
but it is not yet complete.  It parses the basic metadata of
the hash table, the bucket array, and the names buffer, but
doesn't use any of these fields yet as the data structure
requires a non-trivial amount of work to understand.

Added:
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h
    llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
Modified:
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ByteStream.h
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/DbiStream.h
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/StreamReader.h
    llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt
    llvm/trunk/lib/DebugInfo/PDB/Raw/ByteStream.cpp
    llvm/trunk/lib/DebugInfo/PDB/Raw/DbiStream.cpp
    llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
    llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ByteStream.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ByteStream.h?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ByteStream.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ByteStream.h Mon May  2 13:09:14 2016
@@ -38,6 +38,7 @@ public:
   uint32_t getLength() const override;
 
   ArrayRef<uint8_t> data() const { return Data; }
+  StringRef str() const;
 
 private:
   MutableArrayRef<uint8_t> Data;

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/DbiStream.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/DbiStream.h?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/DbiStream.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/DbiStream.h Mon May  2 13:09:14 2016
@@ -14,6 +14,7 @@
 #include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
 #include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/Support/Endian.h"
 
@@ -54,6 +55,7 @@ private:
   MappedBlockStream Stream;
 
   std::vector<ModuleInfoEx> ModuleInfos;
+  NameHashTable ECNames;
 
   ByteStream ModInfoSubstream;
   ByteStream SecContrSubstream;

Added: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h?rev=268268&view=auto
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h (added)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h Mon May  2 13:09:14 2016
@@ -0,0 +1,48 @@
+//===- NameHashTable.h - PDB Name Hash Table --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLE_H
+#define LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
+
+#include <stdint.h>
+#include <utility>
+
+namespace llvm {
+namespace pdb {
+class StreamReader;
+class NameHashTable {
+public:
+  NameHashTable();
+
+  std::error_code load(StreamReader &Stream);
+
+  uint32_t getNameCount() const { return NameCount; }
+  uint32_t getHashVersion() const { return HashVersion; }
+  uint32_t getSignature() const { return Signature; }
+
+  StringRef getStringForID(uint32_t ID) const;
+  uint32_t getIDForString(StringRef Str) const;
+
+  ArrayRef<uint32_t> name_ids() const;
+
+private:
+  ByteStream NamesBuffer;
+  std::vector<uint32_t> IDs;
+  uint32_t Signature;
+  uint32_t HashVersion;
+  uint32_t NameCount;
+};
+}
+}
+
+#endif

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/StreamReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/StreamReader.h?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/StreamReader.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/StreamReader.h Mon May  2 13:09:14 2016
@@ -34,6 +34,11 @@ public:
     return readBytes(Buffer);
   }
 
+  template <typename T> std::error_code readArray(MutableArrayRef<T> Array) {
+    MutableArrayRef<uint8_t> Casted(reinterpret_cast<uint8_t*>(Array.data()), Array.size() * sizeof(T));
+    return readBytes(Casted);
+  }
+
   void setOffset(uint32_t Off) { Offset = Off; }
   uint32_t getOffset() const { return Offset; }
   uint32_t getLength() const { return Stream.getLength(); }

Modified: llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt (original)
+++ llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt Mon May  2 13:09:14 2016
@@ -33,6 +33,7 @@ add_pdb_impl_folder(Raw
   Raw/PDBFile.cpp
   Raw/DbiStream.cpp
   Raw/InfoStream.cpp
+  Raw/NameHashTable.cpp
   Raw/NameMap.cpp
   Raw/RawSession.cpp
   Raw/StreamReader.cpp)

Modified: llvm/trunk/lib/DebugInfo/PDB/Raw/ByteStream.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Raw/ByteStream.cpp?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Raw/ByteStream.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Raw/ByteStream.cpp Mon May  2 13:09:14 2016
@@ -59,3 +59,8 @@ std::error_code ByteStream::readBytes(ui
 }
 
 uint32_t ByteStream::getLength() const { return Data.size(); }
+
+StringRef ByteStream::str() const {
+  const char *CharData = reinterpret_cast<const char *>(Data.data());
+  return StringRef(CharData, Data.size());
+}

Modified: llvm/trunk/lib/DebugInfo/PDB/Raw/DbiStream.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Raw/DbiStream.cpp?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Raw/DbiStream.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Raw/DbiStream.cpp Mon May  2 13:09:14 2016
@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
@@ -148,6 +149,9 @@ std::error_code DbiStream::reload() {
   if (Reader.bytesRemaining() > 0)
     return std::make_error_code(std::errc::illegal_byte_sequence);
 
+  StreamReader ECReader(ECSubstream);
+  ECNames.load(ECReader);
+
   return std::error_code();
 }
 

Added: llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp?rev=268268&view=auto
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp (added)
+++ llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp Mon May  2 13:09:14 2016
@@ -0,0 +1,141 @@
+//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
+#include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+typedef uint32_t *PUL;
+typedef uint16_t *PUS;
+
+static inline uint32_t HashStringV1(StringRef Str) {
+  uint32_t Result = 0;
+  uint32_t Size = Str.size();
+
+  ArrayRef<ulittle32_t> Longs(reinterpret_cast<const ulittle32_t *>(Str.data()),
+                              Size / 4);
+
+  for (auto Value : Longs)
+    Result ^= Value;
+
+  const uint8_t *Remainder = reinterpret_cast<const uint8_t *>(Longs.end());
+  uint32_t RemainderSize = Size - Longs.size() * 4;
+
+  // Maximum of 3 bytes left.  Hash a 2 byte word if possible, then hash the
+  // possibly remaining 1 byte.
+  if (RemainderSize >= 2) {
+    Result ^= *reinterpret_cast<const ulittle16_t *>(Remainder);
+    Remainder += 2;
+    RemainderSize -= 2;
+  }
+
+  // hash possible odd byte
+  if (RemainderSize == 1) {
+    Result ^= *(Remainder++);
+  }
+
+  const uint32_t toLowerMask = 0x20202020;
+  Result |= toLowerMask;
+  Result ^= (Result >> 11);
+
+  return Result ^ (Result >> 16);
+}
+
+static inline uint32_t HashStringV2(StringRef Str) {
+  uint32_t Hash = 0xb170a1bf;
+
+  ArrayRef<char> Buffer(Str.begin(), Str.end());
+
+  ArrayRef<ulittle32_t> Items(
+      reinterpret_cast<const ulittle32_t *>(Buffer.data()),
+      Buffer.size() / sizeof(ulittle32_t));
+  for (ulittle32_t Item : Items) {
+    Hash += Item;
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+  Buffer = Buffer.slice(Items.size() * sizeof(ulittle32_t));
+  for (uint8_t Item : Buffer) {
+    Hash += Item;
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+
+  return Hash * 1664525L + 1013904223L;
+}
+
+NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {}
+
+std::error_code NameHashTable::load(StreamReader &Stream) {
+  struct Header {
+    support::ulittle32_t Signature;
+    support::ulittle32_t HashVersion;
+    support::ulittle32_t ByteSize;
+  };
+
+  Header H;
+  Stream.readObject(&H);
+  if (H.Signature != 0xEFFEEFFE)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (H.HashVersion != 1 && H.HashVersion != 2)
+    return std::make_error_code(std::errc::not_supported);
+
+  Signature = H.Signature;
+  HashVersion = H.HashVersion;
+  NamesBuffer.initialize(Stream, H.ByteSize);
+
+  support::ulittle32_t HashCount;
+  Stream.readObject(&HashCount);
+  std::vector<support::ulittle32_t> BucketArray(HashCount);
+  Stream.readArray<support::ulittle32_t>(BucketArray);
+  IDs.assign(BucketArray.begin(), BucketArray.end());
+
+  if (Stream.bytesRemaining() < sizeof(support::ulittle32_t))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  Stream.readInteger(NameCount);
+  return std::error_code();
+}
+
+StringRef NameHashTable::getStringForID(uint32_t ID) const {
+  if (ID == IDs[0])
+    return StringRef();
+
+  return StringRef(NamesBuffer.str().begin() + ID);
+}
+
+uint32_t NameHashTable::getIDForString(StringRef Str) const {
+  uint32_t Hash = (HashVersion == 1) ? HashStringV1(Str) : HashStringV2(Str);
+  size_t Count = IDs.size();
+  uint32_t Start = Hash % Count;
+  for (size_t I = 0; I < Count; ++I) {
+    // The hash is just a starting point for the search, but if it
+    // doesn't work we should find the string no matter what, because
+    // we iterate the entire array.
+    uint32_t Index = (Start + I) % Count;
+
+    uint32_t ID = IDs[Index];
+    StringRef S = getStringForID(ID);
+    if (S == Str)
+      return ID;
+  }
+  // IDs[0] contains the ID of the "invalid" entry.
+  return IDs[0];
+}
+
+ArrayRef<uint32_t> NameHashTable::name_ids() const {
+  return ArrayRef<uint32_t>(IDs).slice(1, NameCount);
+}

Modified: llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test (original)
+++ llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test Mon May  2 13:09:14 2016
@@ -20,6 +20,11 @@
 ; EMPTY:      NameStream: 13
 ; EMPTY-NEXT: NameStreamSignature: effeeffe
 ; EMPTY-NEXT: NameStreamVersion: 1
+; EMPTY-NEXT: Name Count: 4
+; EMPTY-NEXT: Name: d:\src\llvm\test\debuginfo\pdb\inputs\predefined c++ attributes (compiler internal)
+; EMPTY-NEXT: Name:
+; EMPTY-NEXT: Name: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+; EMPTY-NEXT: Name:
 
 ; EMPTY:      Dbi Version: 19990903
 ; EMPTY-NEXT: Age: 1
@@ -76,6 +81,16 @@ BIG-NEXT: Guid: {880ECC89-DF81-0B4F-839C
 BIG:      NameStream: 13
 BIG-NEXT: NameStreamSignature: effeeffe
 BIG-NEXT: NameStreamVersion: 1
+BIG-NEXT: Name Count: 92
+BIG-NEXT: Name: f:\dd\vctools\crt\vcruntime\inc\vcruntime_startup.h
+BIG-NEXT: Name: f:\dd\vctools\crt\vcstartup\src\misc\checkcfg.c
+BIG-NEXT: Name: f:\dd\vctools\langapi\include\isa_availability.h
+BIG-NEXT: Name:
+BIG-NEXT: Name: $T0 $ebp = $T2 $esp = $T1 .raSearchStart = $eip $T1 ^ = $ebp $T0 = $esp $T1 4 + =
+BIG-NEXT: Name:
+BIG-NEXT: Name: f:\dd\vctools\crt\vcstartup\src\startup\exe_common.inl
+BIG-NEXT: Name: $T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + =  $20 $T0 40 - ^ =  $23 $T0 44 - ^ =  $24 $T0 48 - ^ =
+BIG-NEXT: Name: $T0 $ebp = $eip $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + =  $23 $T0 16 - ^ =  $24 $T0 20 - ^ =
 
 BIG:      Dbi Version: 19990903
 BIG-NEXT: Age: 1

Modified: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp?rev=268268&r1=268267&r2=268268&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp (original)
+++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp Mon May  2 13:09:14 2016
@@ -39,6 +39,7 @@
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawSession.h"
 #include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
@@ -147,19 +148,6 @@ cl::opt<bool> NoEnumDefs("no-enum-defini
                          cl::cat(FilterCategory));
 }
 
-
-static void reportError(StringRef Input, StringRef Message) {
-  if (Input == "-")
-    Input = "<stdin>";
-  errs() << Input << ": " << Message << "\n";
-  errs().flush();
-  exit(1);
-}
-
-static void reportError(StringRef Input, std::error_code EC) {
-  reportError(Input, EC.message());
-}
-
 static void dumpStructure(RawSession &RS) {
   PDBFile &File = RS.getPDBFile();
 
@@ -253,19 +241,15 @@ static void dumpStructure(RawSession &RS
 
     outs() << "NameStream: " << NameStreamIndex << '\n';
 
-    // The name stream appears to start with a signature and version.
-    uint32_t NameStreamSignature;
-    Reader.readInteger(NameStreamSignature);
+    NameHashTable NameTable;
+    NameTable.load(Reader);
     outs() << "NameStreamSignature: ";
-    outs().write_hex(NameStreamSignature) << '\n';
-
-    uint32_t NameStreamVersion;
-    Reader.readInteger(NameStreamVersion);
-    outs() << "NameStreamVersion: " << NameStreamVersion << '\n';
-
-    // We only support this particular version of the name stream.
-    if (NameStreamSignature != 0xeffeeffe || NameStreamVersion != 1)
-      reportError("", std::make_error_code(std::errc::not_supported));
+    outs().write_hex(NameTable.getSignature()) << '\n';
+    outs() << "NameStreamVersion: " << NameTable.getHashVersion() << '\n';
+    outs() << "Name Count: " << NameTable.getNameCount() << '\n';
+    for (uint32_t ID : NameTable.name_ids()) {
+      outs() << "Name: " << NameTable.getStringForID(ID) << '\n';
+    }
   }
 
   DbiStream &DS = File.getPDBDbiStream();




More information about the llvm-commits mailing list