[llvm] 5070cec - [PDB] Bypass generic deserialization code for publics sorting
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Sat May 2 18:14:57 PDT 2020
Author: Reid Kleckner
Date: 2020-05-02T18:14:50-07:00
New Revision: 5070cecd7219fc078cf0084d9dfa91e86b490bc2
URL: https://github.com/llvm/llvm-project/commit/5070cecd7219fc078cf0084d9dfa91e86b490bc2
DIFF: https://github.com/llvm/llvm-project/commit/5070cecd7219fc078cf0084d9dfa91e86b490bc2.diff
LOG: [PDB] Bypass generic deserialization code for publics sorting
The number of public symbols is very large, and each deserialization
does a few heap allocations. The public symbols are serialized by the
linker, so we can assume they have the expected layout and use it
directly.
Saves O(#publics) temporary heap allocations and shrinks some data
structures.
Added:
Modified:
llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 1fcef9dd06c8..4383534b0db2 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -350,6 +350,13 @@ class InlineSiteSym : public SymbolRecord {
uint32_t RecordOffset = 0;
};
+struct PublicSym32Header {
+ ulittle32_t Flags;
+ ulittle32_t Offset;
+ ulittle16_t Segment;
+ // char Name[];
+};
+
// S_PUB32
class PublicSym32 : public SymbolRecord {
public:
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 432f1e9b24d3..88eab574d0f7 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -229,15 +229,30 @@ Error GSIStreamBuilder::finalizeMsfLayout() {
return Error::success();
}
-static bool comparePubSymByAddrAndName(
- const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
- const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
- if (LS.second->Segment != RS.second->Segment)
- return LS.second->Segment < RS.second->Segment;
- if (LS.second->Offset != RS.second->Offset)
- return LS.second->Offset < RS.second->Offset;
-
- return LS.second->Name < RS.second->Name;
+static StringRef extractPubSym(const CVSymbol *Sym, uint16_t &Seg,
+ uint32_t &Offset) {
+ ArrayRef<uint8_t> Buf = Sym->content();
+ assert(Buf.size() > sizeof(PublicSym32Header));
+ const auto *Hdr = reinterpret_cast<const PublicSym32Header *>(Buf.data());
+ Buf = Buf.drop_front(sizeof(PublicSym32Header));
+ Seg = Hdr->Segment;
+ Offset = Hdr->Offset;
+ // Don't worry about finding the null terminator, since the strings will be
+ // compared later.
+ return StringRef(reinterpret_cast<const char *>(Buf.data()), Buf.size());
+}
+
+static bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) {
+ uint16_t LSeg, RSeg;
+ uint32_t LOff, ROff;
+ StringRef LName, RName;
+ LName = extractPubSym(LS, LSeg, LOff);
+ RName = extractPubSym(RS, RSeg, ROff);
+ if (LSeg != RSeg)
+ return LSeg < RSeg;
+ if (LOff != ROff)
+ return LOff < ROff;
+ return LName < RName;
}
/// Compute the address map. The address map is an array of symbol offsets
@@ -246,19 +261,15 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
// Make a vector of pointers to the symbols so we can sort it by address.
// Also gather the symbol offsets while we're at it.
- std::vector<PublicSym32> DeserializedPublics;
- std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
+ std::vector<const CVSymbol *> PublicsByAddr;
std::vector<uint32_t> SymOffsets;
- DeserializedPublics.reserve(Records.size());
PublicsByAddr.reserve(Records.size());
SymOffsets.reserve(Records.size());
uint32_t SymOffset = 0;
for (const CVSymbol &Sym : Records) {
assert(Sym.kind() == SymbolKind::S_PUB32);
- DeserializedPublics.push_back(
- cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
- PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
+ PublicsByAddr.push_back(&Sym);
SymOffsets.push_back(SymOffset);
SymOffset += Sym.length();
}
@@ -267,8 +278,8 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
// Fill in the symbol offsets in the appropriate order.
std::vector<ulittle32_t> AddrMap;
AddrMap.reserve(Records.size());
- for (auto &Sym : PublicsByAddr) {
- ptr
diff _t Idx = std::distance(Records.data(), Sym.first);
+ for (const CVSymbol *Sym : PublicsByAddr) {
+ ptr
diff _t Idx = std::distance(Records.data(), Sym);
assert(Idx >= 0 && size_t(Idx) < Records.size());
AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
}
More information about the llvm-commits
mailing list