[llvm] 5070cec - [PDB] Bypass generic deserialization code for publics sorting

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Sat May 2 18:14:57 PDT 2020


Author: Reid Kleckner
Date: 2020-05-02T18:14:50-07:00
New Revision: 5070cecd7219fc078cf0084d9dfa91e86b490bc2

URL: https://github.com/llvm/llvm-project/commit/5070cecd7219fc078cf0084d9dfa91e86b490bc2
DIFF: https://github.com/llvm/llvm-project/commit/5070cecd7219fc078cf0084d9dfa91e86b490bc2.diff

LOG: [PDB] Bypass generic deserialization code for publics sorting

The number of public symbols is very large, and each deserialization
does a few heap allocations. The public symbols are serialized by the
linker, so we can assume they have the expected layout and use it
directly.

Saves O(#publics) temporary heap allocations and shrinks some data
structures.

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
    llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 1fcef9dd06c8..4383534b0db2 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -350,6 +350,13 @@ class InlineSiteSym : public SymbolRecord {
   uint32_t RecordOffset = 0;
 };
 
+struct PublicSym32Header {
+  ulittle32_t Flags;
+  ulittle32_t Offset;
+  ulittle16_t Segment;
+  // char Name[];
+};
+
 // S_PUB32
 class PublicSym32 : public SymbolRecord {
 public:

diff  --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 432f1e9b24d3..88eab574d0f7 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -229,15 +229,30 @@ Error GSIStreamBuilder::finalizeMsfLayout() {
   return Error::success();
 }
 
-static bool comparePubSymByAddrAndName(
-    const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
-    const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
-  if (LS.second->Segment != RS.second->Segment)
-    return LS.second->Segment < RS.second->Segment;
-  if (LS.second->Offset != RS.second->Offset)
-    return LS.second->Offset < RS.second->Offset;
-
-  return LS.second->Name < RS.second->Name;
+static StringRef extractPubSym(const CVSymbol *Sym, uint16_t &Seg,
+                               uint32_t &Offset) {
+  ArrayRef<uint8_t> Buf = Sym->content();
+  assert(Buf.size() > sizeof(PublicSym32Header));
+  const auto *Hdr = reinterpret_cast<const PublicSym32Header *>(Buf.data());
+  Buf = Buf.drop_front(sizeof(PublicSym32Header));
+  Seg = Hdr->Segment;
+  Offset = Hdr->Offset;
+  // Don't worry about finding the null terminator, since the strings will be
+  // compared later.
+  return StringRef(reinterpret_cast<const char *>(Buf.data()), Buf.size());
+}
+
+static bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) {
+  uint16_t LSeg, RSeg;
+  uint32_t LOff, ROff;
+  StringRef LName, RName;
+  LName = extractPubSym(LS, LSeg, LOff);
+  RName = extractPubSym(RS, RSeg, ROff);
+  if (LSeg != RSeg)
+    return LSeg < RSeg;
+  if (LOff != ROff)
+    return LOff < ROff;
+  return LName < RName;
 }
 
 /// Compute the address map. The address map is an array of symbol offsets
@@ -246,19 +261,15 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
   // Make a vector of pointers to the symbols so we can sort it by address.
   // Also gather the symbol offsets while we're at it.
 
-  std::vector<PublicSym32> DeserializedPublics;
-  std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
+  std::vector<const CVSymbol *> PublicsByAddr;
   std::vector<uint32_t> SymOffsets;
-  DeserializedPublics.reserve(Records.size());
   PublicsByAddr.reserve(Records.size());
   SymOffsets.reserve(Records.size());
 
   uint32_t SymOffset = 0;
   for (const CVSymbol &Sym : Records) {
     assert(Sym.kind() == SymbolKind::S_PUB32);
-    DeserializedPublics.push_back(
-        cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
-    PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
+    PublicsByAddr.push_back(&Sym);
     SymOffsets.push_back(SymOffset);
     SymOffset += Sym.length();
   }
@@ -267,8 +278,8 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
   // Fill in the symbol offsets in the appropriate order.
   std::vector<ulittle32_t> AddrMap;
   AddrMap.reserve(Records.size());
-  for (auto &Sym : PublicsByAddr) {
-    ptr
diff _t Idx = std::distance(Records.data(), Sym.first);
+  for (const CVSymbol *Sym : PublicsByAddr) {
+    ptr
diff _t Idx = std::distance(Records.data(), Sym);
     assert(Idx >= 0 && size_t(Idx) < Records.size());
     AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
   }


        


More information about the llvm-commits mailing list