[lld] 6529d7c - [PDB] Defer relocating .debug$S until commit time and parallelize it

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 12 17:47:40 PST 2021


Author: Reid Kleckner
Date: 2021-01-12T17:46:29-08:00
New Revision: 6529d7c5a45b1b9588e512013b02f891d71bc134

URL: https://github.com/llvm/llvm-project/commit/6529d7c5a45b1b9588e512013b02f891d71bc134
DIFF: https://github.com/llvm/llvm-project/commit/6529d7c5a45b1b9588e512013b02f891d71bc134.diff

LOG: [PDB] Defer relocating .debug$S until commit time and parallelize it

This is a pretty classic optimization. Instead of processing symbol
records and copying them to temporary storage, do a first pass to
measure how large the module symbol stream will be, and then copy the
data into place in the PDB file. This requires defering relocation until
much later, which accounts for most of the complexity in this patch.

This patch avoids copying the contents of all live .debug$S sections
into heap memory, which is worth about 20% of private memory usage when
making PDBs. However, this is not an unmitigated performance win,
because it can be faster to read dense, temporary, heap data than it is
to iterate symbol records in object file backed memory a second time.

Results on release chrome.dll:
peak mem: 5164.89MB -> 4072.19MB (-1,092.7MB, -21.2%)
wall-j1:  0m30.844s -> 0m32.094s (slightly slower)
wall-j3:  0m20.968s -> 0m20.312s (slightly faster)
wall-j8:  0m19.062s -> 0m17.672s (meaningfully faster)

I gathered similar numbers for a debug, component build of content.dll
in Chrome, and the performance impact of this change was in the noise.
The memory usage reduction was visible and similar.

Because of the new parallelism in the PDB commit phase, more cores makes
the new approach faster. I'm assuming that most C++ developer machines
these days are at least quad core, so I think this is a win.

Differential Revision: https://reviews.llvm.org/D94267

Added: 
    

Modified: 
    lld/COFF/Chunks.cpp
    lld/COFF/Chunks.h
    lld/COFF/PDB.cpp
    llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
    llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
    llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index e04ceed505c2..f2cdc24b7bf1 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -369,47 +369,88 @@ void SectionChunk::writeTo(uint8_t *buf) const {
       continue;
     }
 
-    uint8_t *off = buf + rel.VirtualAddress;
+    applyRelocation(buf + rel.VirtualAddress, rel);
+  }
+}
 
-    auto *sym =
-        dyn_cast_or_null<Defined>(file->getSymbol(rel.SymbolTableIndex));
+void SectionChunk::applyRelocation(uint8_t *off,
+                                   const coff_relocation &rel) const {
+  auto *sym = dyn_cast_or_null<Defined>(file->getSymbol(rel.SymbolTableIndex));
 
-    // Get the output section of the symbol for this relocation.  The output
-    // section is needed to compute SECREL and SECTION relocations used in debug
-    // info.
-    Chunk *c = sym ? sym->getChunk() : nullptr;
-    OutputSection *os = c ? c->getOutputSection() : nullptr;
-
-    // Skip the relocation if it refers to a discarded section, and diagnose it
-    // as an error if appropriate. If a symbol was discarded early, it may be
-    // null. If it was discarded late, the output section will be null, unless
-    // it was an absolute or synthetic symbol.
-    if (!sym ||
-        (!os && !isa<DefinedAbsolute>(sym) && !isa<DefinedSynthetic>(sym))) {
-      maybeReportRelocationToDiscarded(this, sym, rel);
-      continue;
-    }
+  // Get the output section of the symbol for this relocation.  The output
+  // section is needed to compute SECREL and SECTION relocations used in debug
+  // info.
+  Chunk *c = sym ? sym->getChunk() : nullptr;
+  OutputSection *os = c ? c->getOutputSection() : nullptr;
 
-    uint64_t s = sym->getRVA();
+  // Skip the relocation if it refers to a discarded section, and diagnose it
+  // as an error if appropriate. If a symbol was discarded early, it may be
+  // null. If it was discarded late, the output section will be null, unless
+  // it was an absolute or synthetic symbol.
+  if (!sym ||
+      (!os && !isa<DefinedAbsolute>(sym) && !isa<DefinedSynthetic>(sym))) {
+    maybeReportRelocationToDiscarded(this, sym, rel);
+    return;
+  }
 
-    // Compute the RVA of the relocation for relative relocations.
-    uint64_t p = rva + rel.VirtualAddress;
-    switch (config->machine) {
-    case AMD64:
-      applyRelX64(off, rel.Type, os, s, p);
-      break;
-    case I386:
-      applyRelX86(off, rel.Type, os, s, p);
-      break;
-    case ARMNT:
-      applyRelARM(off, rel.Type, os, s, p);
-      break;
-    case ARM64:
-      applyRelARM64(off, rel.Type, os, s, p);
+  uint64_t s = sym->getRVA();
+
+  // Compute the RVA of the relocation for relative relocations.
+  uint64_t p = rva + rel.VirtualAddress;
+  switch (config->machine) {
+  case AMD64:
+    applyRelX64(off, rel.Type, os, s, p);
+    break;
+  case I386:
+    applyRelX86(off, rel.Type, os, s, p);
+    break;
+  case ARMNT:
+    applyRelARM(off, rel.Type, os, s, p);
+    break;
+  case ARM64:
+    applyRelARM64(off, rel.Type, os, s, p);
+    break;
+  default:
+    llvm_unreachable("unknown machine type");
+  }
+}
+
+// Defend against unsorted relocations. This may be overly conservative.
+void SectionChunk::sortRelocations() {
+  auto cmpByVa = [](const coff_relocation &l, const coff_relocation &r) {
+    return l.VirtualAddress < r.VirtualAddress;
+  };
+  if (llvm::is_sorted(getRelocs(), cmpByVa))
+    return;
+  warn("some relocations in " + file->getName() + " are not sorted");
+  MutableArrayRef<coff_relocation> newRelocs(
+      bAlloc.Allocate<coff_relocation>(relocsSize), relocsSize);
+  memcpy(newRelocs.data(), relocsData, relocsSize * sizeof(coff_relocation));
+  llvm::sort(newRelocs, cmpByVa);
+  setRelocs(newRelocs);
+}
+
+// Similar to writeTo, but suitable for relocating a subsection of the overall
+// section.
+void SectionChunk::writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
+                                              ArrayRef<uint8_t> subsec,
+                                              uint32_t &nextRelocIndex,
+                                              uint8_t *buf) const {
+  assert(!subsec.empty() && !sec.empty());
+  assert(sec.begin() <= subsec.begin() && subsec.end() <= sec.end() &&
+         "subsection is not part of this section");
+  size_t vaBegin = std::distance(sec.begin(), subsec.begin());
+  size_t vaEnd = std::distance(sec.begin(), subsec.end());
+  memcpy(buf, subsec.data(), subsec.size());
+  for (; nextRelocIndex < relocsSize; ++nextRelocIndex) {
+    const coff_relocation &rel = relocsData[nextRelocIndex];
+    // Skip relocations applied before this subsection.
+    if (rel.VirtualAddress < vaBegin)
+      continue;
+    // Stop if the relocation does not apply to this subsection.
+    if (rel.VirtualAddress >= vaEnd)
       break;
-    default:
-      llvm_unreachable("unknown machine type");
-    }
+    applyRelocation(&buf[rel.VirtualAddress - vaBegin], rel);
   }
 }
 

diff  --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 0528143383c5..e076d8e71109 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -204,6 +204,15 @@ class SectionChunk final : public Chunk {
   ArrayRef<uint8_t> getContents() const;
   void writeTo(uint8_t *buf) const;
 
+  // Defend against unsorted relocations. This may be overly conservative.
+  void sortRelocations();
+
+  // Write and relocate a portion of the section. This is intended to be called
+  // in a loop. Relocations must be sorted first.
+  void writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
+                                  ArrayRef<uint8_t> subsec,
+                                  uint32_t &nextRelocIndex, uint8_t *buf) const;
+
   uint32_t getOutputCharacteristics() const {
     return header->Characteristics & (permMask | typeMask);
   }
@@ -212,6 +221,7 @@ class SectionChunk final : public Chunk {
   }
   void getBaserels(std::vector<Baserel> *res);
   bool isCOMDAT() const;
+  void applyRelocation(uint8_t *off, const coff_relocation &rel) const;
   void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
                    uint64_t p) const;
   void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,

diff  --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 36526de7796c..fe362ccaf0dc 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -62,6 +62,7 @@ using namespace lld;
 using namespace lld::coff;
 
 using llvm::object::coff_section;
+using llvm::pdb::StringTableFixup;
 
 static ExitOnError exitOnErr;
 
@@ -108,6 +109,8 @@ class PDBLinker {
   /// Link info for each import file in the symbol table into the PDB.
   void addImportFilesToPDB(ArrayRef<OutputSection *> outputSections);
 
+  void createModuleDBI(ObjFile *file);
+
   /// Link CodeView from a single object file into the target (output) PDB.
   /// When a precompiled headers object is linked, its TPI map might be provided
   /// externally.
@@ -115,9 +118,30 @@ class PDBLinker {
 
   void addDebugSymbols(TpiSource *source);
 
-  void mergeSymbolRecords(TpiSource *source,
-                          std::vector<ulittle32_t *> &stringTableRefs,
-                          BinaryStreamRef symData);
+  // Analyze the symbol records to separate module symbols from global symbols,
+  // find string references, and calculate how large the symbol stream will be
+  // in the PDB.
+  void analyzeSymbolSubsection(SectionChunk *debugChunk,
+                               uint32_t &moduleSymOffset,
+                               uint32_t &nextRelocIndex,
+                               std::vector<StringTableFixup> &stringTableFixups,
+                               BinaryStreamRef symData);
+
+  // Write all module symbols from all all live debug symbol subsections of the
+  // given object file into the given stream writer.
+  Error writeAllModuleSymbolRecords(ObjFile *file, BinaryStreamWriter &writer);
+
+  // Callback to copy and relocate debug symbols during PDB file writing.
+  static Error commitSymbolsForObject(void *ctx, void *obj,
+                                      BinaryStreamWriter &writer);
+
+  // Copy the symbol record, relocate it, and fix the alignment if necessary.
+  // Rewrite type indices in the record. Replace unrecognized symbol records
+  // with S_SKIP records.
+  void writeSymbolRecord(SectionChunk *debugChunk,
+                         ArrayRef<uint8_t> sectionContents, CVSymbol sym,
+                         size_t alignedSize, uint32_t &nextRelocIndex,
+                         std::vector<uint8_t> &storage);
 
   /// Add the section map and section contributions to the PDB.
   void addSections(ArrayRef<OutputSection *> outputSections,
@@ -150,6 +174,16 @@ class PDBLinker {
   uint64_t nbTypeRecordsBytes = 0;
 };
 
+/// Represents an unrelocated DEBUG_S_FRAMEDATA subsection.
+struct UnrelocatedFpoData {
+  SectionChunk *debugChunk = nullptr;
+  ArrayRef<uint8_t> subsecData;
+  uint32_t relocIndex = 0;
+};
+
+/// The size of the magic bytes at the beginning of a symbol section or stream.
+enum : uint32_t { kSymbolStreamMagicSize = 4 };
+
 class DebugSHandler {
   PDBLinker &linker;
 
@@ -176,23 +210,36 @@ class DebugSHandler {
   /// contain string table references which need to be re-written, so we
   /// collect them all here and re-write them after all subsections have been
   /// discovered and processed.
-  std::vector<DebugFrameDataSubsectionRef> newFpoFrames;
+  std::vector<UnrelocatedFpoData> frameDataSubsecs;
+
+  /// List of string table references in symbol records. Later they will be
+  /// applied to the symbols during PDB writing.
+  std::vector<StringTableFixup> stringTableFixups;
+
+  /// Sum of the size of all module symbol records across all .debug$S sections.
+  /// Includes record realignment and the size of the symbol stream magic
+  /// prefix.
+  uint32_t moduleStreamSize = kSymbolStreamMagicSize;
+
+  /// Next relocation index in the current .debug$S section. Resets every
+  /// handleDebugS call.
+  uint32_t nextRelocIndex = 0;
 
-  /// Pointers to raw memory that we determine have string table references
-  /// that need to be re-written.  We first process all .debug$S subsections
-  /// to ensure that we can handle subsections written in any order, building
-  /// up this list as we go.  At the end, we use the string table (which must
-  /// have been discovered by now else it is an error) to re-write these
-  /// references.
-  std::vector<ulittle32_t *> stringTableReferences;
+  void advanceRelocIndex(SectionChunk *debugChunk, ArrayRef<uint8_t> subsec);
 
-  void mergeInlineeLines(const DebugSubsectionRecord &inlineeLines);
+  void addUnrelocatedSubsection(SectionChunk *debugChunk,
+                                const DebugSubsectionRecord &ss);
+
+  void addFrameDataSubsection(SectionChunk *debugChunk,
+                              const DebugSubsectionRecord &ss);
+
+  void recordStringTableReferences(CVSymbol sym, uint32_t symOffset);
 
 public:
   DebugSHandler(PDBLinker &linker, ObjFile &file, TpiSource *source)
       : linker(linker), file(file), source(source) {}
 
-  void handleDebugS(ArrayRef<uint8_t> relocatedDebugContents);
+  void handleDebugS(SectionChunk *debugChunk);
 
   void finish();
 };
@@ -266,27 +313,19 @@ static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) {
 }
 
 static void
-recordStringTableReferenceAtOffset(MutableArrayRef<uint8_t> contents,
-                                   uint32_t offset,
-                                   std::vector<ulittle32_t *> &strTableRefs) {
-  contents =
-      contents.drop_front(offset).take_front(sizeof(support::ulittle32_t));
-  ulittle32_t *index = reinterpret_cast<ulittle32_t *>(contents.data());
-  strTableRefs.push_back(index);
-}
-
-static void
-recordStringTableReferences(SymbolKind kind, MutableArrayRef<uint8_t> contents,
-                            std::vector<ulittle32_t *> &strTableRefs) {
+recordStringTableReferences(CVSymbol sym, uint32_t symOffset,
+                            std::vector<StringTableFixup> &stringTableFixups) {
   // For now we only handle S_FILESTATIC, but we may need the same logic for
   // S_DEFRANGE and S_DEFRANGE_SUBFIELD.  However, I cannot seem to generate any
   // PDBs that contain these types of records, so because of the uncertainty
   // they are omitted here until we can prove that it's necessary.
-  switch (kind) {
-  case SymbolKind::S_FILESTATIC:
+  switch (sym.kind()) {
+  case SymbolKind::S_FILESTATIC: {
     // FileStaticSym::ModFileOffset
-    recordStringTableReferenceAtOffset(contents, 8, strTableRefs);
+    uint32_t ref = *reinterpret_cast<const ulittle32_t *>(&sym.data()[8]);
+    stringTableFixups.push_back({ref, symOffset + 8});
     break;
+  }
   case SymbolKind::S_DEFRANGE:
   case SymbolKind::S_DEFRANGE_SUBFIELD:
     log("Not fixing up string table reference in S_DEFRANGE / "
@@ -359,60 +398,48 @@ static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
   }
 }
 
-/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
-/// The object file may not be aligned.
-static MutableArrayRef<uint8_t>
-copyAndAlignSymbol(const CVSymbol &sym, MutableArrayRef<uint8_t> &alignedMem) {
-  size_t size = alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
-  assert(size >= 4 && "record too short");
-  assert(size <= MaxRecordLength && "record too long");
-  assert(alignedMem.size() >= size && "didn't preallocate enough");
-
-  // Copy the symbol record and zero out any padding bytes.
-  MutableArrayRef<uint8_t> newData = alignedMem.take_front(size);
-  alignedMem = alignedMem.drop_front(size);
-  memcpy(newData.data(), sym.data().data(), sym.length());
-  memset(newData.data() + sym.length(), 0, size - sym.length());
-
-  // Update the record prefix length. It should point to the beginning of the
-  // next record.
-  auto *prefix = reinterpret_cast<RecordPrefix *>(newData.data());
-  prefix->RecordLen = size - 2;
-  return newData;
-}
-
+namespace {
 struct ScopeRecord {
   ulittle32_t ptrParent;
   ulittle32_t ptrEnd;
 };
+} // namespace
 
-struct SymbolScope {
-  ScopeRecord *openingRecord;
-  uint32_t scopeOffset;
-};
+/// Given a pointer to a symbol record that opens a scope, return a pointer to
+/// the scope fields.
+static ScopeRecord *getSymbolScopeFields(void *sym) {
+  return reinterpret_cast<ScopeRecord *>(reinterpret_cast<char *>(sym) +
+                                         sizeof(RecordPrefix));
+}
 
-static void scopeStackOpen(SmallVectorImpl<SymbolScope> &stack,
-                           uint32_t curOffset, CVSymbol &sym) {
-  assert(symbolOpensScope(sym.kind()));
-  SymbolScope s;
-  s.scopeOffset = curOffset;
-  s.openingRecord = const_cast<ScopeRecord *>(
-      reinterpret_cast<const ScopeRecord *>(sym.content().data()));
-  s.openingRecord->ptrParent = stack.empty() ? 0 : stack.back().scopeOffset;
-  stack.push_back(s);
+// To open a scope, push the offset of the current symbol record onto the
+// stack.
+static void scopeStackOpen(SmallVectorImpl<uint32_t> &stack,
+                           std::vector<uint8_t> &storage) {
+  stack.push_back(storage.size());
 }
 
-static void scopeStackClose(SmallVectorImpl<SymbolScope> &stack,
-                            uint32_t curOffset, InputFile *file) {
+// To close a scope, update the record that opened the scope.
+static void scopeStackClose(SmallVectorImpl<uint32_t> &stack,
+                            std::vector<uint8_t> &storage,
+                            uint32_t storageBaseOffset, ObjFile *file) {
   if (stack.empty()) {
     warn("symbol scopes are not balanced in " + file->getName());
     return;
   }
-  SymbolScope s = stack.pop_back_val();
-  s.openingRecord->ptrEnd = curOffset;
+
+  // Update ptrEnd of the record that opened the scope to point to the
+  // current record, if we are writing into the module symbol stream.
+  uint32_t offOpen = stack.pop_back_val();
+  uint32_t offEnd = storageBaseOffset + storage.size();
+  uint32_t offParent = stack.empty() ? 0 : (stack.back() + storageBaseOffset);
+  ScopeRecord *scopeRec = getSymbolScopeFields(&(storage)[offOpen]);
+  scopeRec->ptrParent = offParent;
+  scopeRec->ptrEnd = offEnd;
 }
 
-static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
+static bool symbolGoesInModuleStream(const CVSymbol &sym,
+                                     unsigned symbolScopeDepth) {
   switch (sym.kind()) {
   case SymbolKind::S_GDATA32:
   case SymbolKind::S_CONSTANT:
@@ -426,7 +453,7 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
     return false;
   // S_UDT records go in the module stream if it is not a global S_UDT.
   case SymbolKind::S_UDT:
-    return !isGlobalScope;
+    return symbolScopeDepth > 0;
   // S_GDATA32 does not go in the module stream, but S_LDATA32 does.
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_LTHREAD32:
@@ -436,13 +463,15 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
 }
 
 static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
-                                      bool isFunctionScope) {
+                                      unsigned symbolScopeDepth) {
   switch (sym.kind()) {
   case SymbolKind::S_CONSTANT:
   case SymbolKind::S_GDATA32:
   case SymbolKind::S_GTHREAD32:
   case SymbolKind::S_GPROC32:
   case SymbolKind::S_LPROC32:
+  case SymbolKind::S_GPROC32_ID:
+  case SymbolKind::S_LPROC32_ID:
   // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
   // since they are synthesized by the linker in response to S_GPROC32 and
   // S_LPROC32, but if we do see them, copy them straight through.
@@ -453,14 +482,16 @@ static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
   case SymbolKind::S_UDT:
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_LTHREAD32:
-    return !isFunctionScope;
+    return symbolScopeDepth == 0;
   default:
     return false;
   }
 }
 
 static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
-                            unsigned symOffset, const CVSymbol &sym) {
+                            unsigned symOffset,
+                            std::vector<uint8_t> &symStorage) {
+  CVSymbol sym(makeArrayRef(symStorage));
   switch (sym.kind()) {
   case SymbolKind::S_CONSTANT:
   case SymbolKind::S_UDT:
@@ -469,9 +500,14 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   case SymbolKind::S_LTHREAD32:
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_PROCREF:
-  case SymbolKind::S_LPROCREF:
-    builder.addGlobalSymbol(sym);
+  case SymbolKind::S_LPROCREF: {
+    // sym is a temporary object, so we have to copy and reallocate the record
+    // to stabilize it.
+    uint8_t *mem = bAlloc.Allocate<uint8_t>(sym.length());
+    memcpy(mem, sym.data().data(), sym.length());
+    builder.addGlobalSymbol(CVSymbol(makeArrayRef(mem, sym.length())));
     break;
+  }
   case SymbolKind::S_GPROC32:
   case SymbolKind::S_LPROC32: {
     SymbolRecordKind k = SymbolRecordKind::ProcRefSym;
@@ -492,117 +528,189 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   }
 }
 
-void PDBLinker::mergeSymbolRecords(TpiSource *source,
-                                   std::vector<ulittle32_t *> &stringTableRefs,
-                                   BinaryStreamRef symData) {
-  ObjFile *file = source->file;
+// Check if the given symbol record was padded for alignment. If so, zero out
+// the padding bytes and update the record prefix with the new size.
+static void fixRecordAlignment(MutableArrayRef<uint8_t> recordBytes,
+                               size_t oldSize) {
+  size_t alignedSize = recordBytes.size();
+  if (oldSize == alignedSize)
+    return;
+  reinterpret_cast<RecordPrefix *>(recordBytes.data())->RecordLen =
+      alignedSize - 2;
+  memset(recordBytes.data() + oldSize, 0, alignedSize - oldSize);
+}
+
+// Replace any record with a skip record of the same size. This is useful when
+// we have reserved size for a symbol record, but type index remapping fails.
+static void replaceWithSkipRecord(MutableArrayRef<uint8_t> recordBytes) {
+  memset(recordBytes.data(), 0, recordBytes.size());
+  auto *prefix = reinterpret_cast<RecordPrefix *>(recordBytes.data());
+  prefix->RecordKind = SymbolKind::S_SKIP;
+  prefix->RecordLen = recordBytes.size() - 2;
+}
+
+// Copy the symbol record, relocate it, and fix the alignment if necessary.
+// Rewrite type indices in the record. Replace unrecognized symbol records with
+// S_SKIP records.
+void PDBLinker::writeSymbolRecord(SectionChunk *debugChunk,
+                                  ArrayRef<uint8_t> sectionContents,
+                                  CVSymbol sym, size_t alignedSize,
+                                  uint32_t &nextRelocIndex,
+                                  std::vector<uint8_t> &storage) {
+  // Allocate space for the new record at the end of the storage.
+  storage.resize(storage.size() + alignedSize);
+  auto recordBytes = MutableArrayRef<uint8_t>(storage).take_back(alignedSize);
+
+  // Copy the symbol record and relocate it.
+  debugChunk->writeAndRelocateSubsection(sectionContents, sym.data(),
+                                         nextRelocIndex, recordBytes.data());
+  fixRecordAlignment(recordBytes, sym.length());
+
+  // Re-map all the type index references.
+  TpiSource *source = debugChunk->file->debugTypesObj;
+  if (!source->remapTypesInSymbolRecord(recordBytes)) {
+    log("ignoring unknown symbol record with kind 0x" + utohexstr(sym.kind()));
+    replaceWithSkipRecord(recordBytes);
+  }
+
+  // An object file may have S_xxx_ID symbols, but these get converted to
+  // "real" symbols in a PDB.
+  translateIdSymbols(recordBytes, tMerger, source);
+}
+
+void PDBLinker::analyzeSymbolSubsection(
+    SectionChunk *debugChunk, uint32_t &moduleSymOffset,
+    uint32_t &nextRelocIndex, std::vector<StringTableFixup> &stringTableFixups,
+    BinaryStreamRef symData) {
+  ObjFile *file = debugChunk->file;
+  uint32_t moduleSymStart = moduleSymOffset;
+
+  uint32_t scopeLevel = 0;
+  std::vector<uint8_t> storage;
+  ArrayRef<uint8_t> sectionContents = debugChunk->getContents();
+
   ArrayRef<uint8_t> symsBuffer;
   cantFail(symData.readBytes(0, symData.getLength(), symsBuffer));
-  SmallVector<SymbolScope, 4> scopes;
 
   if (symsBuffer.empty())
     warn("empty symbols subsection in " + file->getName());
 
-  // Iterate every symbol to check if any need to be realigned, and if so, how
-  // much space we need to allocate for them.
-  bool needsRealignment = false;
-  unsigned totalRealignedSize = 0;
-  auto ec = forEachCodeViewRecord<CVSymbol>(
+  Error ec = forEachCodeViewRecord<CVSymbol>(
       symsBuffer, [&](CVSymbol sym) -> llvm::Error {
-        unsigned realignedSize =
+        // Track the current scope.
+        if (symbolOpensScope(sym.kind()))
+          ++scopeLevel;
+        else if (symbolEndsScope(sym.kind()))
+          --scopeLevel;
+
+        uint32_t alignedSize =
             alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
-        needsRealignment |= realignedSize != sym.length();
-        totalRealignedSize += realignedSize;
+
+        // Copy global records. Some global records (mainly procedures)
+        // reference the current offset into the module stream.
+        if (symbolGoesInGlobalsStream(sym, scopeLevel)) {
+          storage.clear();
+          writeSymbolRecord(debugChunk, sectionContents, sym, alignedSize,
+                            nextRelocIndex, storage);
+          addGlobalSymbol(builder.getGsiBuilder(),
+                          file->moduleDBI->getModuleIndex(), moduleSymOffset,
+                          storage);
+          ++globalSymbols;
+        }
+
+        // Update the module stream offset and record any string table index
+        // references. There are very few of these and they will be rewritten
+        // later during PDB writing.
+        if (symbolGoesInModuleStream(sym, scopeLevel)) {
+          recordStringTableReferences(sym, moduleSymOffset, stringTableFixups);
+          moduleSymOffset += alignedSize;
+          ++moduleSymbols;
+        }
+
         return Error::success();
       });
 
-  // If any of the symbol record lengths was corrupt, ignore them all, warn
-  // about it, and move on.
+  // If we encountered corrupt records, ignore the whole subsection. If we wrote
+  // any partial records, undo that. For globals, we just keep what we have and
+  // continue.
   if (ec) {
     warn("corrupt symbol records in " + file->getName());
+    moduleSymOffset = moduleSymStart;
     consumeError(std::move(ec));
-    return;
-  }
-
-  // If any symbol needed realignment, allocate enough contiguous memory for
-  // them all. Typically symbol subsections are small enough that this will not
-  // cause fragmentation.
-  MutableArrayRef<uint8_t> alignedSymbolMem;
-  if (needsRealignment) {
-    void *alignedData =
-        bAlloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb));
-    alignedSymbolMem = makeMutableArrayRef(
-        reinterpret_cast<uint8_t *>(alignedData), totalRealignedSize);
   }
+}
 
-  // Iterate again, this time doing the real work.
-  unsigned curSymOffset = file->moduleDBI->getNextSymbolOffset();
-  ArrayRef<uint8_t> bulkSymbols;
-  cantFail(forEachCodeViewRecord<CVSymbol>(
-      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
-        // Align the record if required.
-        MutableArrayRef<uint8_t> recordBytes;
-        if (needsRealignment) {
-          recordBytes = copyAndAlignSymbol(sym, alignedSymbolMem);
-          sym = CVSymbol(recordBytes);
-        } else {
-          // Otherwise, we can actually mutate the symbol directly, since we
-          // copied it to apply relocations.
-          recordBytes = makeMutableArrayRef(
-              const_cast<uint8_t *>(sym.data().data()), sym.length());
-        }
+Error PDBLinker::writeAllModuleSymbolRecords(ObjFile *file,
+                                             BinaryStreamWriter &writer) {
+  std::vector<uint8_t> storage;
+  SmallVector<uint32_t, 4> scopes;
 
-        // Re-map all the type index references.
-        if (!source->remapTypesInSymbolRecord(recordBytes)) {
-          log("error remapping types in symbol of kind 0x" +
-              utohexstr(sym.kind()) + ", ignoring");
-          return Error::success();
-        }
+  // Visit all live .debug$S sections a second time, and write them to the PDB.
+  for (SectionChunk *debugChunk : file->getDebugChunks()) {
+    if (!debugChunk->live || debugChunk->getSize() == 0 ||
+        debugChunk->getSectionName() != ".debug$S")
+      continue;
 
-        // An object file may have S_xxx_ID symbols, but these get converted to
-        // "real" symbols in a PDB.
-        translateIdSymbols(recordBytes, tMerger, source);
-        sym = CVSymbol(recordBytes);
+    ArrayRef<uint8_t> sectionContents = debugChunk->getContents();
+    auto contents =
+        SectionChunk::consumeDebugMagic(sectionContents, ".debug$S");
+    DebugSubsectionArray subsections;
+    BinaryStreamReader reader(contents, support::little);
+    exitOnErr(reader.readArray(subsections, contents.size()));
 
-        // If this record refers to an offset in the object file's string table,
-        // add that item to the global PDB string table and re-write the index.
-        recordStringTableReferences(sym.kind(), recordBytes, stringTableRefs);
+    uint32_t nextRelocIndex = 0;
+    for (const DebugSubsectionRecord &ss : subsections) {
+      if (ss.kind() != DebugSubsectionKind::Symbols)
+        continue;
 
-        // Fill in "Parent" and "End" fields by maintaining a stack of scopes.
-        if (symbolOpensScope(sym.kind()))
-          scopeStackOpen(scopes, curSymOffset, sym);
-        else if (symbolEndsScope(sym.kind()))
-          scopeStackClose(scopes, curSymOffset, file);
+      uint32_t moduleSymStart = writer.getOffset();
+      scopes.clear();
+      storage.clear();
+      ArrayRef<uint8_t> symsBuffer;
+      BinaryStreamRef sr = ss.getRecordData();
+      cantFail(sr.readBytes(0, sr.getLength(), symsBuffer));
+      auto ec = forEachCodeViewRecord<CVSymbol>(
+          symsBuffer, [&](CVSymbol sym) -> llvm::Error {
+            // Track the current scope. Only update records in the postmerge
+            // pass.
+            if (symbolOpensScope(sym.kind()))
+              scopeStackOpen(scopes, storage);
+            else if (symbolEndsScope(sym.kind()))
+              scopeStackClose(scopes, storage, moduleSymStart, file);
+
+            // Copy, relocate, and rewrite each module symbol.
+            if (symbolGoesInModuleStream(sym, scopes.size())) {
+              uint32_t alignedSize =
+                  alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
+              writeSymbolRecord(debugChunk, sectionContents, sym, alignedSize,
+                                nextRelocIndex, storage);
+            }
+            return Error::success();
+          });
+
+      // If we encounter corrupt records in the second pass, ignore them. We
+      // already warned about them in the first analysis pass.
+      if (ec) {
+        consumeError(std::move(ec));
+        storage.clear();
+      }
 
-        // Add the symbol to the globals stream if necessary.  Do this before
-        // adding the symbol to the module since we may need to get the next
-        // symbol offset, and writing to the module's symbol stream will update
-        // that offset.
-        if (symbolGoesInGlobalsStream(sym, !scopes.empty())) {
-          addGlobalSymbol(builder.getGsiBuilder(),
-                          file->moduleDBI->getModuleIndex(), curSymOffset, sym);
-          ++globalSymbols;
-        }
+      // Writing bytes has a very high overhead, so write the entire subsection
+      // at once.
+      // TODO: Consider buffering symbols for the entire object file to reduce
+      // overhead even further.
+      if (Error e = writer.writeBytes(storage))
+        return e;
+    }
+  }
 
-        if (symbolGoesInModuleStream(sym, scopes.empty())) {
-          // Add symbols to the module in bulk. If this symbol is contiguous
-          // with the previous run of symbols to add, combine the ranges. If
-          // not, close the previous range of symbols and start a new one.
-          if (sym.data().data() == bulkSymbols.end()) {
-            bulkSymbols = makeArrayRef(bulkSymbols.data(),
-                                       bulkSymbols.size() + sym.length());
-          } else {
-            file->moduleDBI->addSymbolsInBulk(bulkSymbols);
-            bulkSymbols = recordBytes;
-          }
-          curSymOffset += sym.length();
-          ++moduleSymbols;
-        }
-        return Error::success();
-      }));
+  return Error::success();
+}
 
-  // Add any remaining symbols we've accumulated.
-  file->moduleDBI->addSymbolsInBulk(bulkSymbols);
+Error PDBLinker::commitSymbolsForObject(void *ctx, void *obj,
+                                        BinaryStreamWriter &writer) {
+  return static_cast<PDBLinker *>(ctx)->writeAllModuleSymbolRecords(
+      static_cast<ObjFile *>(obj), writer);
 }
 
 static pdb::SectionContrib createSectionContrib(const Chunk *c, uint32_t modi) {
@@ -642,13 +750,18 @@ translateStringTableIndex(uint32_t objIndex,
   return pdbStrTable.insert(*expectedString);
 }
 
-void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
-  relocatedDebugContents =
-      SectionChunk::consumeDebugMagic(relocatedDebugContents, ".debug$S");
-
+void DebugSHandler::handleDebugS(SectionChunk *debugChunk) {
+  // Note that we are processing the *unrelocated* section contents. They will
+  // be relocated later during PDB writing.
+  ArrayRef<uint8_t> contents = debugChunk->getContents();
+  contents = SectionChunk::consumeDebugMagic(contents, ".debug$S");
   DebugSubsectionArray subsections;
-  BinaryStreamReader reader(relocatedDebugContents, support::little);
-  exitOnErr(reader.readArray(subsections, relocatedDebugContents.size()));
+  BinaryStreamReader reader(contents, support::little);
+  exitOnErr(reader.readArray(subsections, contents.size()));
+  debugChunk->sortRelocations();
+
+  // Reset the relocation index, since this is a new section.
+  nextRelocIndex = 0;
 
   for (const DebugSubsectionRecord &ss : subsections) {
     // Ignore subsections with the 'ignore' bit. Some versions of the Visual C++
@@ -669,30 +782,17 @@ void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
       exitOnErr(checksums.initialize(ss.getRecordData()));
       break;
     case DebugSubsectionKind::Lines:
-      // We can add the relocated line table directly to the PDB without
-      // modification because the file checksum offsets will stay the same.
-      file.moduleDBI->addDebugSubsection(ss);
-      break;
     case DebugSubsectionKind::InlineeLines:
-      // The inlinee lines subsection also has file checksum table references
-      // that can be used directly, but it contains function id references that
-      // must be remapped.
-      mergeInlineeLines(ss);
+      addUnrelocatedSubsection(debugChunk, ss);
       break;
-    case DebugSubsectionKind::FrameData: {
-      // We need to re-write string table indices here, so save off all
-      // frame data subsections until we've processed the entire list of
-      // subsections so that we can be sure we have the string table.
-      DebugFrameDataSubsectionRef fds;
-      exitOnErr(fds.initialize(ss.getRecordData()));
-      newFpoFrames.push_back(std::move(fds));
+    case DebugSubsectionKind::FrameData:
+      addFrameDataSubsection(debugChunk, ss);
       break;
-    }
-    case DebugSubsectionKind::Symbols: {
-      linker.mergeSymbolRecords(source, stringTableReferences,
-                                ss.getRecordData());
+    case DebugSubsectionKind::Symbols:
+      linker.analyzeSymbolSubsection(debugChunk, moduleStreamSize,
+                                     nextRelocIndex, stringTableFixups,
+                                     ss.getRecordData());
       break;
-    }
 
     case DebugSubsectionKind::CrossScopeImports:
     case DebugSubsectionKind::CrossScopeExports:
@@ -719,6 +819,85 @@ void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
   }
 }
 
+void DebugSHandler::advanceRelocIndex(SectionChunk *sc,
+                                      ArrayRef<uint8_t> subsec) {
+  ptr
diff _t vaBegin = subsec.data() - sc->getContents().data();
+  assert(vaBegin > 0);
+  auto relocs = sc->getRelocs();
+  for (; nextRelocIndex < relocs.size(); ++nextRelocIndex) {
+    if (relocs[nextRelocIndex].VirtualAddress >= vaBegin)
+      break;
+  }
+}
+
+namespace {
+/// Wrapper class for unrelocated line and inlinee line subsections, which
+/// require only relocation and type index remapping to add to the PDB.
+class UnrelocatedDebugSubsection : public DebugSubsection {
+public:
+  UnrelocatedDebugSubsection(DebugSubsectionKind k, SectionChunk *debugChunk,
+                             ArrayRef<uint8_t> subsec, uint32_t relocIndex)
+      : DebugSubsection(k), debugChunk(debugChunk), subsec(subsec),
+        relocIndex(relocIndex) {}
+
+  Error commit(BinaryStreamWriter &writer) const override;
+  uint32_t calculateSerializedSize() const override { return subsec.size(); }
+
+  SectionChunk *debugChunk;
+  ArrayRef<uint8_t> subsec;
+  uint32_t relocIndex;
+};
+} // namespace
+
+Error UnrelocatedDebugSubsection::commit(BinaryStreamWriter &writer) const {
+  std::vector<uint8_t> relocatedBytes(subsec.size());
+  uint32_t tmpRelocIndex = relocIndex;
+  debugChunk->writeAndRelocateSubsection(debugChunk->getContents(), subsec,
+                                         tmpRelocIndex, relocatedBytes.data());
+
+  // Remap type indices in inlinee line records in place. Skip the remapping if
+  // there is no type source info.
+  if (kind() == DebugSubsectionKind::InlineeLines &&
+      debugChunk->file->debugTypesObj) {
+    TpiSource *source = debugChunk->file->debugTypesObj;
+    DebugInlineeLinesSubsectionRef inlineeLines;
+    BinaryStreamReader storageReader(relocatedBytes, support::little);
+    exitOnErr(inlineeLines.initialize(storageReader));
+    for (const InlineeSourceLine &line : inlineeLines) {
+      TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
+      if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) {
+        log("bad inlinee line record in " + debugChunk->file->getName() +
+            " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
+      }
+    }
+  }
+
+  return writer.writeBytes(relocatedBytes);
+}
+
+void DebugSHandler::addUnrelocatedSubsection(SectionChunk *debugChunk,
+                                             const DebugSubsectionRecord &ss) {
+  ArrayRef<uint8_t> subsec;
+  BinaryStreamRef sr = ss.getRecordData();
+  cantFail(sr.readBytes(0, sr.getLength(), subsec));
+  advanceRelocIndex(debugChunk, subsec);
+  file.moduleDBI->addDebugSubsection(
+      std::make_shared<UnrelocatedDebugSubsection>(ss.kind(), debugChunk,
+                                                   subsec, nextRelocIndex));
+}
+
+void DebugSHandler::addFrameDataSubsection(SectionChunk *debugChunk,
+                                           const DebugSubsectionRecord &ss) {
+  // We need to re-write string table indices here, so save off all
+  // frame data subsections until we've processed the entire list of
+  // subsections so that we can be sure we have the string table.
+  ArrayRef<uint8_t> subsec;
+  BinaryStreamRef sr = ss.getRecordData();
+  cantFail(sr.readBytes(0, sr.getLength(), subsec));
+  advanceRelocIndex(debugChunk, subsec);
+  frameDataSubsecs.push_back({debugChunk, subsec, nextRelocIndex});
+}
+
 static Expected<StringRef>
 getFileName(const DebugStringTableSubsectionRef &strings,
             const DebugChecksumsSubsectionRef &checksums, uint32_t fileID) {
@@ -729,31 +908,14 @@ getFileName(const DebugStringTableSubsectionRef &strings,
   return strings.getString(offset);
 }
 
-void DebugSHandler::mergeInlineeLines(
-    const DebugSubsectionRecord &inlineeSubsection) {
-  DebugInlineeLinesSubsectionRef inlineeLines;
-  exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData()));
-  if (!source) {
-    warn("ignoring inlinee lines section in file that lacks type information");
-    return;
-  }
-
-  // Remap type indices in inlinee line records in place.
-  for (const InlineeSourceLine &line : inlineeLines) {
-    TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
-    if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) {
-      log("bad inlinee line record in " + file.getName() +
-          " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
-    }
-  }
-
-  // Add the modified inlinee line subsection directly.
-  file.moduleDBI->addDebugSubsection(inlineeSubsection);
-}
-
 void DebugSHandler::finish() {
   pdb::DbiStreamBuilder &dbiBuilder = linker.builder.getDbiBuilder();
 
+  // If we found any symbol records for the module symbol stream, defer them.
+  if (moduleStreamSize > kSymbolStreamMagicSize)
+    file.moduleDBI->addUnmergedSymbols(&file, moduleStreamSize -
+                                                  kSymbolStreamMagicSize);
+
   // We should have seen all debug subsections across the entire object file now
   // which means that if a StringTable subsection and Checksums subsection were
   // present, now is the time to handle them.
@@ -762,26 +924,50 @@ void DebugSHandler::finish() {
       fatal(".debug$S sections with a checksums subsection must also contain a "
             "string table subsection");
 
-    if (!stringTableReferences.empty())
+    if (!stringTableFixups.empty())
       warn("No StringTable subsection was encountered, but there are string "
            "table references");
     return;
   }
 
-  // Rewrite string table indices in the Fpo Data and symbol records to refer to
-  // the global PDB string table instead of the object file string table.
-  for (DebugFrameDataSubsectionRef &fds : newFpoFrames) {
-    const ulittle32_t *reloc = fds.getRelocPtr();
+  // Handle FPO data. Each subsection begins with a single image base
+  // relocation, which is then added to the RvaStart of each frame data record
+  // when it is added to the PDB. The string table indices for the FPO program
+  // must also be rewritten to use the PDB string table.
+  for (const UnrelocatedFpoData &subsec : frameDataSubsecs) {
+    // Relocate the first four bytes of the subection and reinterpret them as a
+    // 32 bit integer.
+    SectionChunk *debugChunk = subsec.debugChunk;
+    ArrayRef<uint8_t> subsecData = subsec.subsecData;
+    uint32_t relocIndex = subsec.relocIndex;
+    auto unrelocatedRvaStart = subsecData.take_front(sizeof(uint32_t));
+    uint8_t relocatedRvaStart[sizeof(uint32_t)];
+    debugChunk->writeAndRelocateSubsection(debugChunk->getContents(),
+                                           unrelocatedRvaStart, relocIndex,
+                                           &relocatedRvaStart[0]);
+    uint32_t rvaStart;
+    memcpy(&rvaStart, &relocatedRvaStart[0], sizeof(uint32_t));
+
+    // Copy each frame data record, add in rvaStart, translate string table
+    // indices, and add the record to the PDB.
+    DebugFrameDataSubsectionRef fds;
+    BinaryStreamReader reader(subsecData, support::little);
+    exitOnErr(fds.initialize(reader));
     for (codeview::FrameData fd : fds) {
-      fd.RvaStart += *reloc;
+      fd.RvaStart += rvaStart;
       fd.FrameFunc =
           translateStringTableIndex(fd.FrameFunc, cvStrTab, linker.pdbStrTab);
       dbiBuilder.addNewFpoData(fd);
     }
   }
 
-  for (ulittle32_t *ref : stringTableReferences)
-    *ref = translateStringTableIndex(*ref, cvStrTab, linker.pdbStrTab);
+  // Translate the fixups and pass them off to the module builder so they will
+  // be applied during writing.
+  for (StringTableFixup &ref : stringTableFixups) {
+    ref.StrTabOffset =
+        translateStringTableIndex(ref.StrTabOffset, cvStrTab, linker.pdbStrTab);
+  }
+  file.moduleDBI->setStringTableFixups(std::move(stringTableFixups));
 
   // Make a new file checksum table that refers to offsets in the PDB-wide
   // string table. Generally the string table subsection appears after the
@@ -844,11 +1030,12 @@ void PDBLinker::addDebugSymbols(TpiSource *source) {
     if (!isDebugS && !isDebugF)
       continue;
 
-    ArrayRef<uint8_t> relocatedDebugContents = relocateDebugChunk(*debugChunk);
-
     if (isDebugS) {
-      dsh.handleDebugS(relocatedDebugContents);
+      dsh.handleDebugS(debugChunk);
     } else if (isDebugF) {
+      // Handle old FPO data .debug$F sections. These are relatively rare.
+      ArrayRef<uint8_t> relocatedDebugContents =
+          relocateDebugChunk(*debugChunk);
       FixedStreamArray<object::FpoData> fpoRecords;
       BinaryStreamReader reader(relocatedDebugContents, support::little);
       uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData);
@@ -869,7 +1056,7 @@ void PDBLinker::addDebugSymbols(TpiSource *source) {
 // path to the object into the PDB. If this is a plain object, we make its
 // path absolute. If it's an object in an archive, we make the archive path
 // absolute.
-static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
+void PDBLinker::createModuleDBI(ObjFile *file) {
   pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
   SmallString<128> objName;
 
@@ -880,6 +1067,7 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
 
   file->moduleDBI = &exitOnErr(dbiBuilder.addModuleInfo(modName));
   file->moduleDBI->setObjFileName(objName);
+  file->moduleDBI->setMergeSymbolsCallback(this, &commitSymbolsForObject);
 
   ArrayRef<Chunk *> chunks = file->getChunks();
   uint32_t modi = file->moduleDBI->getModuleIndex();
@@ -946,8 +1134,7 @@ void PDBLinker::addObjectsToPDB() {
   ScopedTimer t1(addObjectsTimer);
 
   // Create module descriptors
-  for_each(ObjFile::instances,
-           [&](ObjFile *obj) { createModuleDBI(builder, obj); });
+  for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(obj); });
 
   // Reorder dependency type sources to come first.
   TpiSource::sortDependencies();
@@ -1330,16 +1517,18 @@ void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
         cs, bAlloc, CodeViewContainer::Pdb));
 
-    SmallVector<SymbolScope, 4> scopes;
     CVSymbol newSym = codeview::SymbolSerializer::writeOneSymbol(
         ts, bAlloc, CodeViewContainer::Pdb);
-    scopeStackOpen(scopes, mod->getNextSymbolOffset(), newSym);
+
+    // Write ptrEnd for the S_THUNK32.
+    ScopeRecord *thunkSymScope =
+        getSymbolScopeFields(const_cast<uint8_t *>(newSym.data().data()));
 
     mod->addSymbol(newSym);
 
     newSym = codeview::SymbolSerializer::writeOneSymbol(es, bAlloc,
                                                         CodeViewContainer::Pdb);
-    scopeStackClose(scopes, mod->getNextSymbolOffset(), file);
+    thunkSymScope->ptrEnd = mod->getNextSymbolOffset();
 
     mod->addSymbol(newSym);
 

diff  --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index beaaef0c5a6c..82b63d729454 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -34,6 +34,34 @@ struct MSFLayout;
 }
 namespace pdb {
 
+// Represents merged or unmerged symbols. Merged symbols can be written to the
+// output file as is, but unmerged symbols must be rewritten first. In either
+// case, the size must be known up front.
+struct SymbolListWrapper {
+  explicit SymbolListWrapper(ArrayRef<uint8_t> Syms)
+      : SymPtr(const_cast<uint8_t *>(Syms.data())), SymSize(Syms.size()),
+        NeedsToBeMerged(false) {}
+  explicit SymbolListWrapper(void *SymSrc, uint32_t Length)
+      : SymPtr(SymSrc), SymSize(Length), NeedsToBeMerged(true) {}
+
+  ArrayRef<uint8_t> asArray() const {
+    return ArrayRef<uint8_t>(static_cast<const uint8_t *>(SymPtr), SymSize);
+  }
+
+  uint32_t size() const { return SymSize; }
+
+  void *SymPtr = nullptr;
+  uint32_t SymSize = 0;
+  bool NeedsToBeMerged = false;
+};
+
+/// Represents a string table reference at some offset in the module symbol
+/// stream.
+struct StringTableFixup {
+  uint32_t StrTabOffset = 0;
+  uint32_t SymOffsetOfReference = 0;
+};
+
 class DbiModuleDescriptorBuilder {
   friend class DbiStreamBuilder;
 
@@ -48,10 +76,28 @@ class DbiModuleDescriptorBuilder {
 
   void setPdbFilePathNI(uint32_t NI);
   void setObjFileName(StringRef Name);
+
+  // Callback to merge one source of unmerged symbols.
+  using MergeSymbolsCallback = Error (*)(void *Ctx, void *Symbols,
+                                         BinaryStreamWriter &Writer);
+
+  void setMergeSymbolsCallback(void *Ctx, MergeSymbolsCallback Callback) {
+    MergeSymsCtx = Ctx;
+    MergeSymsCallback = Callback;
+  }
+
+  void setStringTableFixups(std::vector<StringTableFixup> &&Fixups) {
+    StringTableFixups = std::move(Fixups);
+  }
+
   void setFirstSectionContrib(const SectionContrib &SC);
   void addSymbol(codeview::CVSymbol Symbol);
   void addSymbolsInBulk(ArrayRef<uint8_t> BulkSymbols);
 
+  // Add symbols of known size which will be merged (rewritten) when committing
+  // the PDB to disk.
+  void addUnmergedSymbols(void *SymSrc, uint32_t SymLength);
+
   void
   addDebugSubsection(std::shared_ptr<codeview::DebugSubsection> Subsection);
 
@@ -77,8 +123,14 @@ class DbiModuleDescriptorBuilder {
   void finalize();
   Error finalizeMsfLayout();
 
-  Error commit(BinaryStreamWriter &ModiWriter, const msf::MSFLayout &MsfLayout,
-               WritableBinaryStreamRef MsfBuffer);
+  /// Commit the DBI descriptor to the DBI stream.
+  Error commit(BinaryStreamWriter &ModiWriter);
+
+  /// Commit the accumulated symbols to the module symbol stream. Safe to call
+  /// in parallel on 
diff erent DbiModuleDescriptorBuilder objects. Only modifies
+  /// the pre-allocated stream in question.
+  Error commitSymbolStream(const msf::MSFLayout &MsfLayout,
+                           WritableBinaryStreamRef MsfBuffer);
 
 private:
   uint32_t calculateC13DebugInfoSize() const;
@@ -91,7 +143,12 @@ class DbiModuleDescriptorBuilder {
   std::string ModuleName;
   std::string ObjFileName;
   std::vector<std::string> SourceFiles;
-  std::vector<ArrayRef<uint8_t>> Symbols;
+  std::vector<SymbolListWrapper> Symbols;
+
+  void *MergeSymsCtx = nullptr;
+  MergeSymbolsCallback MergeSymsCallback = nullptr;
+
+  std::vector<StringTableFixup> StringTableFixups;
 
   std::vector<codeview::DebugSubsectionRecordBuilder> C13Builders;
 

diff  --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 73801ea1dd1b..b6f11a942a26 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -74,7 +74,7 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk(
   if (BulkSymbols.empty())
     return;
 
-  Symbols.push_back(BulkSymbols);
+  Symbols.push_back(SymbolListWrapper(BulkSymbols));
   // Symbols written to a PDB file are required to be 4 byte aligned. The same
   // is not true of object files.
   assert(BulkSymbols.size() % alignOf(CodeViewContainer::Pdb) == 0 &&
@@ -82,6 +82,18 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk(
   SymbolByteSize += BulkSymbols.size();
 }
 
+void DbiModuleDescriptorBuilder::addUnmergedSymbols(void *SymSrc,
+                                                    uint32_t SymLength) {
+  assert(SymLength > 0);
+  Symbols.push_back(SymbolListWrapper(SymSrc, SymLength));
+
+  // Symbols written to a PDB file are required to be 4 byte aligned. The same
+  // is not true of object files.
+  assert(SymLength % alignOf(CodeViewContainer::Pdb) == 0 &&
+         "Invalid Symbol alignment!");
+  SymbolByteSize += SymLength;
+}
+
 void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
   SourceFiles.push_back(std::string(Path));
 }
@@ -131,9 +143,7 @@ Error DbiModuleDescriptorBuilder::finalizeMsfLayout() {
   return Error::success();
 }
 
-Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
-                                         const msf::MSFLayout &MsfLayout,
-                                         WritableBinaryStreamRef MsfBuffer) {
+Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter) {
   // We write the Modi record to the `ModiWriter`, but we additionally write its
   // symbol stream to a brand new stream.
   if (auto EC = ModiWriter.writeObject(Layout))
@@ -144,34 +154,55 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
     return EC;
   if (auto EC = ModiWriter.padToAlignment(sizeof(uint32_t)))
     return EC;
+  return Error::success();
+}
 
-  if (Layout.ModDiStream != kInvalidStreamIndex) {
-    auto NS = WritableMappedBlockStream::createIndexedStream(
-        MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
-    WritableBinaryStreamRef Ref(*NS);
-    BinaryStreamWriter SymbolWriter(Ref);
-    // Write the symbols.
-    if (auto EC =
-            SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
-      return EC;
-    for (ArrayRef<uint8_t> Syms : Symbols) {
-      if (auto EC = SymbolWriter.writeBytes(Syms))
+Error DbiModuleDescriptorBuilder::commitSymbolStream(
+    const msf::MSFLayout &MsfLayout, WritableBinaryStreamRef MsfBuffer) {
+  if (Layout.ModDiStream == kInvalidStreamIndex)
+    return Error::success();
+
+  auto NS = WritableMappedBlockStream::createIndexedStream(
+      MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
+  WritableBinaryStreamRef Ref(*NS);
+  BinaryStreamWriter SymbolWriter(Ref);
+  // Write the symbols.
+  if (auto EC = SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
+    return EC;
+  for (const SymbolListWrapper &Sym : Symbols) {
+    if (Sym.NeedsToBeMerged) {
+      assert(MergeSymsCallback);
+      if (auto EC = MergeSymsCallback(MergeSymsCtx, Sym.SymPtr, SymbolWriter))
         return EC;
-    }
-    assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
-           "Invalid debug section alignment!");
-    // TODO: Write C11 Line data
-    for (const auto &Builder : C13Builders) {
-      if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb))
+    } else {
+      if (auto EC = SymbolWriter.writeBytes(Sym.asArray()))
         return EC;
     }
+  }
+
+  // Apply the string table fixups.
+  auto SavedOffset = SymbolWriter.getOffset();
+  for (const StringTableFixup &Fixup : StringTableFixups) {
+    SymbolWriter.setOffset(Fixup.SymOffsetOfReference);
+    if (auto E = SymbolWriter.writeInteger<uint32_t>(Fixup.StrTabOffset))
+      return E;
+  }
+  SymbolWriter.setOffset(SavedOffset);
 
-    // TODO: Figure out what GlobalRefs substream actually is and populate it.
-    if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
+  assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
+         "Invalid debug section alignment!");
+  // TODO: Write C11 Line data
+  for (const auto &Builder : C13Builders) {
+    if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb))
       return EC;
-    if (SymbolWriter.bytesRemaining() > 0)
-      return make_error<RawError>(raw_error_code::stream_too_long);
   }
+
+  // TODO: Figure out what GlobalRefs substream actually is and populate it.
+  if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
+    return EC;
+  if (SymbolWriter.bytesRemaining() > 0)
+    return make_error<RawError>(raw_error_code::stream_too_long);
+
   return Error::success();
 }
 

diff  --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 627aef7506fd..98a8acaffd60 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Parallel.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -394,10 +395,17 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
     return EC;
 
   for (auto &M : ModiList) {
-    if (auto EC = M->commit(Writer, Layout, MsfBuffer))
+    if (auto EC = M->commit(Writer))
       return EC;
   }
 
+  // Commit symbol streams. This is a lot of data, so do it in parallel.
+  if (auto EC = parallelForEachError(
+          ModiList, [&](std::unique_ptr<DbiModuleDescriptorBuilder> &M) {
+            return M->commitSymbolStream(Layout, MsfBuffer);
+          }))
+    return EC;
+
   if (!SectionContribs.empty()) {
     if (auto EC = Writer.writeEnum(DbiSecContribVer60))
       return EC;


        


More information about the llvm-commits mailing list