[llvm] r309303 - [PDB] Write public symbol records and the publics hash table

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 27 11:25:59 PDT 2017


Author: rnk
Date: Thu Jul 27 11:25:59 2017
New Revision: 309303

URL: http://llvm.org/viewvc/llvm-project?rev=309303&view=rev
Log:
[PDB] Write public symbol records and the publics hash table

Summary:
MSVC link.exe records all external symbol names in the publics stream.
It provides similar functionality to an ELF .symtab.

Reviewers: zturner, ruiu

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D35871

Modified:
    llvm/trunk/include/llvm/DebugInfo/CodeView/SymbolRecord.h
    llvm/trunk/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h
    llvm/trunk/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
    llvm/trunk/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
    llvm/trunk/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/CodeView/SymbolRecord.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/CodeView/SymbolRecord.h?rev=309303&r1=309302&r2=309303&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/CodeView/SymbolRecord.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/CodeView/SymbolRecord.h Thu Jul 27 11:25:59 2017
@@ -363,12 +363,12 @@ public:
       : SymbolRecord(SymbolRecordKind::PublicSym32),
         RecordOffset(RecordOffset) {}
 
-  PublicSymFlags Flags;
-  uint32_t Offset;
-  uint16_t Segment;
+  PublicSymFlags Flags = PublicSymFlags::None;
+  uint32_t Offset = 0;
+  uint16_t Segment = 0;
   StringRef Name;
 
-  uint32_t RecordOffset;
+  uint32_t RecordOffset = 0;
 };
 
 // S_REGISTER

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h?rev=309303&r1=309302&r2=309303&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h Thu Jul 27 11:25:59 2017
@@ -10,15 +10,28 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H
 #define LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H
 
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamRef.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
+
+template <> struct BinaryItemTraits<codeview::CVSymbol> {
+  static size_t length(const codeview::CVSymbol &Item) {
+    return Item.RecordData.size();
+  }
+  static ArrayRef<uint8_t> bytes(const codeview::CVSymbol &Item) {
+    return Item.RecordData;
+  }
+};
+
 namespace msf {
 class MSFBuilder;
 }
@@ -26,6 +39,14 @@ namespace pdb {
 class PublicsStream;
 struct PublicsStreamHeader;
 
+struct GSIHashTableBuilder {
+  void addSymbols(ArrayRef<codeview::CVSymbol> Symbols);
+
+  std::vector<PSHashRecord> HashRecords;
+  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
+  std::vector<support::ulittle32_t> HashBuckets;
+};
+
 class PublicsStreamBuilder {
 public:
   explicit PublicsStreamBuilder(msf::MSFBuilder &Msf);
@@ -37,15 +58,19 @@ public:
   Error finalizeMsfLayout();
   uint32_t calculateSerializedLength() const;
 
-  Error commit(BinaryStreamWriter &PublicsWriter);
+  Error commit(BinaryStreamWriter &PublicsWriter,
+               BinaryStreamWriter &RecWriter);
 
   uint32_t getStreamIndex() const { return StreamIdx; }
   uint32_t getRecordStreamIdx() const { return RecordStreamIdx; }
 
+  void addPublicSymbol(const codeview::PublicSym32 &Pub);
+
 private:
   uint32_t StreamIdx = kInvalidStreamIndex;
   uint32_t RecordStreamIdx = kInvalidStreamIndex;
-  std::vector<PSHashRecord> HashRecords;
+  std::unique_ptr<GSIHashTableBuilder> Table;
+  std::vector<codeview::CVSymbol> Publics;
   msf::MSFBuilder &Msf;
 };
 } // namespace pdb

Modified: llvm/trunk/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp?rev=309303&r1=309302&r2=309303&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp Thu Jul 27 11:25:59 2017
@@ -16,6 +16,7 @@
 #include "llvm/DebugInfo/MSF/MSFCommon.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Support/BinaryItemStream.h"
@@ -26,16 +27,6 @@ using namespace llvm::codeview;
 using namespace llvm::msf;
 using namespace llvm::pdb;
 
-namespace llvm {
-template <> struct BinaryItemTraits<CVSymbol> {
-  static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); }
-
-  static ArrayRef<uint8_t> bytes(const CVSymbol &Item) {
-    return Item.RecordData;
-  }
-};
-}
-
 static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize,
                                             uint32_t C13Size) {
   uint32_t Size = sizeof(uint32_t);   // Signature

Modified: llvm/trunk/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp?rev=309303&r1=309302&r2=309303&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp Thu Jul 27 11:25:59 2017
@@ -212,8 +212,11 @@ Error PDBFileBuilder::commit(StringRef F
   if (Publics) {
     auto PS = WritableMappedBlockStream::createIndexedStream(
         Layout, Buffer, Publics->getStreamIndex(), Allocator);
+    auto PRS = WritableMappedBlockStream::createIndexedStream(
+        Layout, Buffer, Publics->getRecordStreamIdx(), Allocator);
     BinaryStreamWriter PSWriter(*PS);
-    if (auto EC = Publics->commit(PSWriter))
+    BinaryStreamWriter RecWriter(*PRS);
+    if (auto EC = Publics->commit(PSWriter, RecWriter))
       return EC;
   }
 

Modified: llvm/trunk/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp?rev=309303&r1=309302&r2=309303&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp Thu Jul 27 11:25:59 2017
@@ -8,16 +8,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MSFCommon.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/Support/BinaryItemStream.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include <algorithm>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::msf;
 using namespace llvm::pdb;
+using namespace llvm::codeview;
 
-PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {}
+PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf)
+    : Table(new GSIHashTableBuilder), Msf(Msf) {}
 
 PublicsStreamBuilder::~PublicsStreamBuilder() {}
 
@@ -25,63 +34,187 @@ uint32_t PublicsStreamBuilder::calculate
   uint32_t Size = 0;
   Size += sizeof(PublicsStreamHeader);
   Size += sizeof(GSIHashHeader);
-  Size += HashRecords.size() * sizeof(PSHashRecord);
-  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  Size += NumBitmapEntries;
-
-  // FIXME: Account for hash buckets.  For now since we we write a zero-bitmap
-  // indicating that no hash buckets are valid, we also write zero byets of hash
-  // bucket data.
-  Size += 0;
+  Size += Table->HashRecords.size() * sizeof(PSHashRecord);
+  Size += Table->HashBitmap.size() * sizeof(uint32_t);
+  Size += Table->HashBuckets.size() * sizeof(uint32_t);
+
+  Size += Publics.size() * sizeof(uint32_t); // AddrMap
+
+  // FIXME: Add thunk map and section offsets for incremental linking.
+
   return Size;
 }
 
 Error PublicsStreamBuilder::finalizeMsfLayout() {
+  Table->addSymbols(Publics);
+
   Expected<uint32_t> Idx = Msf.addStream(calculateSerializedLength());
   if (!Idx)
     return Idx.takeError();
   StreamIdx = *Idx;
 
-  Expected<uint32_t> RecordIdx = Msf.addStream(0);
+  uint32_t PublicRecordBytes = 0;
+  for (auto &Pub : Publics)
+    PublicRecordBytes += Pub.length();
+
+  Expected<uint32_t> RecordIdx = Msf.addStream(PublicRecordBytes);
   if (!RecordIdx)
     return RecordIdx.takeError();
   RecordStreamIdx = *RecordIdx;
   return Error::success();
 }
 
-Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) {
+void PublicsStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
+  Publics.push_back(SymbolSerializer::writeOneSymbol(
+      const_cast<PublicSym32 &>(Pub), Msf.getAllocator(),
+      CodeViewContainer::Pdb));
+}
+
+// FIXME: Put this back in the header.
+struct PubSymLayout {
+  ulittle16_t reclen;
+  ulittle16_t reckind;
+  ulittle32_t flags;
+  ulittle32_t off;
+  ulittle16_t seg;
+  char name[1];
+};
+
+bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) {
+  assert(LS->length() > sizeof(PubSymLayout) &&
+         RS->length() > sizeof(PubSymLayout));
+  auto *L = reinterpret_cast<const PubSymLayout *>(LS->data().data());
+  auto *R = reinterpret_cast<const PubSymLayout *>(RS->data().data());
+  if (L->seg < R->seg)
+    return true;
+  if (L->seg > R->seg)
+    return false;
+  if (L->off < R->off)
+    return true;
+  if (L->off > R->off)
+    return false;
+  return strcmp(L->name, R->name) < 0;
+}
+
+static StringRef getSymbolName(const CVSymbol &Sym) {
+  assert(Sym.kind() == S_PUB32 && "handle other kinds");
+  ArrayRef<uint8_t> NameBytes =
+      Sym.data().drop_front(offsetof(PubSymLayout, name));
+  return StringRef(reinterpret_cast<const char *>(NameBytes.data()),
+                   NameBytes.size())
+      .trim('\0');
+}
+
+/// Compute the address map. The address map is an array of symbol offsets
+/// sorted so that it can be binary searched by address.
+static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Publics) {
+  // Make a vector of pointers to the symbols so we can sort it by address.
+  // Also gather the symbol offsets while we're at it.
+  std::vector<const CVSymbol *> PublicsByAddr;
+  std::vector<uint32_t> SymOffsets;
+  PublicsByAddr.reserve(Publics.size());
+  uint32_t SymOffset = 0;
+  for (const CVSymbol &Sym : Publics) {
+    PublicsByAddr.push_back(&Sym);
+    SymOffsets.push_back(SymOffset);
+    SymOffset += Sym.length();
+  }
+  std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
+                   comparePubSymByAddrAndName);
+
+  // Fill in the symbol offsets in the appropriate order.
+  std::vector<ulittle32_t> AddrMap;
+  AddrMap.reserve(Publics.size());
+  for (const CVSymbol *Sym : PublicsByAddr) {
+    ptrdiff_t Idx = std::distance(Publics.data(), Sym);
+    assert(Idx >= 0 && size_t(Idx) < Publics.size());
+    AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
+  }
+  return AddrMap;
+}
+
+Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter,
+                                   BinaryStreamWriter &RecWriter) {
+  assert(Table->HashRecords.size() == Publics.size());
+
   PublicsStreamHeader PSH;
   GSIHashHeader GSH;
 
-  // FIXME: Figure out what to put for these values.
-  PSH.AddrMap = 0;
-  PSH.ISectThunkTable = 0;
-  PSH.NumSections = 0;
+  PSH.AddrMap = Publics.size() * 4;
+
+  // FIXME: Fill these in. They are for incremental linking.
   PSH.NumThunks = 0;
-  PSH.OffThunkTable = 0;
   PSH.SizeOfThunk = 0;
-  PSH.SymHash = 0;
+  PSH.ISectThunkTable = 0;
+  PSH.OffThunkTable = 0;
+  PSH.NumSections = 0;
 
   GSH.VerSignature = GSIHashHeader::HdrSignature;
   GSH.VerHdr = GSIHashHeader::HdrVersion;
-  GSH.HrSize = 0;
-  GSH.NumBuckets = 0;
+  GSH.HrSize = Table->HashRecords.size() * sizeof(PSHashRecord);
+  GSH.NumBuckets = Table->HashBitmap.size() * 4 + Table->HashBuckets.size() * 4;
+
+  PSH.SymHash = sizeof(GSH) + GSH.HrSize + GSH.NumBuckets;
 
   if (auto EC = PublicsWriter.writeObject(PSH))
     return EC;
   if (auto EC = PublicsWriter.writeObject(GSH))
     return EC;
-  if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords)))
+
+  if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashRecords)))
+    return EC;
+  if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBitmap)))
+    return EC;
+  if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBuckets)))
     return EC;
 
-  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  std::vector<uint8_t> BitmapData(NumBitmapEntries);
-  // FIXME: Build an actual bitmap
-  if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData)))
+  std::vector<ulittle32_t> AddrMap = computeAddrMap(Publics);
+  if (auto EC = PublicsWriter.writeArray(makeArrayRef(AddrMap)))
+    return EC;
+
+  BinaryItemStream<CVSymbol> Records(support::endianness::little);
+  Records.setItems(Publics);
+  BinaryStreamRef RecordsRef(Records);
+  if (auto EC = RecWriter.writeStreamRef(RecordsRef))
     return EC;
 
-  // FIXME: Write actual hash buckets.
   return Error::success();
 }
+
+void GSIHashTableBuilder::addSymbols(ArrayRef<CVSymbol> Symbols) {
+  std::array<std::vector<PSHashRecord>, IPHR_HASH + 1> TmpBuckets;
+  uint32_t SymOffset = 0;
+  for (const CVSymbol &Sym : Symbols) {
+    PSHashRecord HR;
+    // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
+    HR.Off = SymOffset + 1;
+    HR.CRef = 1; // Always use a refcount of 1.
+
+    // Hash the name to figure out which bucket this goes into.
+    StringRef Name = getSymbolName(Sym);
+    size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
+    TmpBuckets[BucketIdx].push_back(HR); // FIXME: Does order matter?
+
+    SymOffset += Sym.length();
+  }
+
+  // Compute the three tables: the hash records in bucket and chain order, the
+  // bucket presence bitmap, and the bucket chain start offsets.
+  HashRecords.reserve(Symbols.size());
+  for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
+    auto &Bucket = TmpBuckets[BucketIdx];
+    if (Bucket.empty())
+      continue;
+    HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
+
+    // Calculate what the offset of the first hash record in the chain would be
+    // if it were inflated to contain 32-bit pointers. On a 32-bit system, each
+    // record would be 12 bytes. See HROffsetCalc in gsi.h.
+    const int SizeOfHROffsetCalc = 12;
+    ulittle32_t ChainStartOff =
+        ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
+    HashBuckets.push_back(ChainStartOff);
+    for (const auto &HR : Bucket)
+      HashRecords.push_back(HR);
+  }
+}




More information about the llvm-commits mailing list