[lld] [lld][ELF] Implement merged .debug_names section. (PR #86508)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 8 00:37:24 PDT 2024
================
@@ -2690,6 +2693,756 @@ static uint32_t computeGdbHash(StringRef s) {
return h;
}
+template <class ELFT>
+DebugNamesSection<ELFT>::DebugNamesSection()
+ : SyntheticSection(0, SHT_PROGBITS, 1, ".debug_names") {}
+
+template <class ELFT>
+template <class RelTy>
+void DebugNamesSection<ELFT>::getNameRelocsImpl(
+ InputSection *sec, ArrayRef<RelTy> rels,
+ DenseMap<uint32_t, uint32_t> &relocs) {
+ for (auto &rel : rels) {
+ Symbol &sym = sec->getFile<ELFT>()->getRelocTargetSym(rel);
+ relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
+ }
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::getNameRelocs(
+ InputSectionBase *base, DenseMap<uint32_t, uint32_t> &relocs) {
+ auto *sec = cast<InputSection>(base);
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ getNameRelocsImpl(sec, rels.rels, relocs);
+ else
+ getNameRelocsImpl(sec, rels.relas, relocs);
+}
+
+template <class ELFT> void DebugNamesSection<ELFT>::writeTo(uint8_t *buf) {
+ SmallVector<uint32_t, 0> mergedCuOffsets;
+ SmallVector<uint32_t, 0> mergedTuOffsets;
+ DenseMap<uint32_t, uint32_t> strOffsets;
+ SmallVector<DenseMap<uint32_t, uint32_t>, 0> chunksRelocs;
+ chunksRelocs.reserve(numChunks);
+
+ for (size_t i = 0, e = numChunks; i != e; ++i) {
+ DebugNamesOutputChunk &chunk = outputChunks[i];
+ InputSectionBase *base = inputDebugNamesSections[i];
+ DenseMap<uint32_t, uint32_t> &relocs = chunksRelocs.emplace_back();
+ getNameRelocs(base, relocs);
+
+ // Update CuOffsets list with new data
+ for (uint32_t cuOffset : chunk.compilationUnits)
+ mergedCuOffsets.push_back(chunk.sec->outSecOff + cuOffset);
+
+ // TODO: Update TuOffsets list with new data
+ }
+
+ // Update the entries with the relocated string offsets.
+ for (NamedEntry &stringEntry : mergedEntries) {
+ uint32_t oldOffset = stringEntry.stringOffsetOffset;
+ uint32_t idx = stringEntry.chunkIdx;
+ stringEntry.relocatedEntryOffset = chunksRelocs[idx][oldOffset];
+ }
+
+ // Write out bytes for merged section.
+
+ // Write the header.
+ endian::write32<ELFT::Endianness>(buf + 0, mergedHdr.UnitLength);
+ endian::write16<ELFT::Endianness>(buf + 4, mergedHdr.Version);
+ endian::write32<ELFT::Endianness>(buf + 8, mergedHdr.CompUnitCount);
+ endian::write32<ELFT::Endianness>(buf + 12, mergedHdr.LocalTypeUnitCount);
+ endian::write32<ELFT::Endianness>(buf + 16, mergedHdr.ForeignTypeUnitCount);
+ endian::write32<ELFT::Endianness>(buf + 20, mergedHdr.BucketCount);
+ endian::write32<ELFT::Endianness>(buf + 24, mergedHdr.NameCount);
+ endian::write32<ELFT::Endianness>(buf + 28, mergedHdr.AbbrevTableSize);
+ endian::write32<ELFT::Endianness>(buf + 32, mergedHdr.AugmentationStringSize);
+ buf += 36;
+ memcpy(buf, mergedHdr.AugmentationString.c_str(), 8);
+ buf += 8;
+
+ // Write the CU list.
+ for (uint32_t offset : mergedCuOffsets) {
+ endian::write32<ELFT::Endianness>(buf + 0, offset);
+ buf += 4;
+ }
+
+ // Write the local TU list.
+ // TODO: Fix this, once we get everything working without TUs.
+ if (mergedHdr.LocalTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the foreign TU list.
+ // TODO: Fix this, once we get everything working without TUs.
+ if (mergedHdr.ForeignTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the hash table.
+ // ... Write the buckets
+ uint32_t idx = 1;
+ for (const SmallVector<NamedEntry *, 0> &bucket : bucketList) {
+ if (!bucket.empty())
+ endian::write32<ELFT::Endianness>(buf + 0, idx);
+ idx += bucket.size();
+ buf += 4;
+ }
+
+ // ...Write the hashes
+ for (const auto &bucket : bucketList) {
+ for (const auto &entry : bucket) {
+ uint32_t hashValue = entry->hashValue;
+ endian::write32<ELFT::Endianness>(buf + 0, hashValue);
+ buf += 4;
+ }
+ }
+
+ // Write the string offsets.
+ for (const NamedEntry &entry : mergedEntries) {
+ endian::write32<ELFT::Endianness>(buf + 0, entry.relocatedEntryOffset);
+ buf += 4;
+ }
+
+ // Write the entry offsets.
+ for (const auto &entry : mergedEntries) {
+ endian::write32<ELFT::Endianness>(buf + 0, entry.entryOffset);
+ buf += 4;
+ }
+
+ // Write the abbrev table.
+ for (const Abbrev *abbrev : mergedAbbrevTable) {
+ buf += encodeULEB128(abbrev->code, buf);
+ buf += encodeULEB128(abbrev->tag, buf);
+ for (DWARFDebugNames::AttributeEncoding attr : abbrev->attributes) {
+ buf += encodeULEB128(attr.Index, buf);
+ buf += encodeULEB128(attr.Form, buf);
+ }
+ endian::write16<ELFT::Endianness>(buf + 0, 0); // attribute sentinels.
+ buf += 2;
+ }
+ *buf++ = 0; // abbrev table sentinel
+
+ // Write the entry pool.
+ for (const auto &stringEntry : mergedEntries) {
+ // Write all the entries for the string.
+ for (const std::unique_ptr<IndexEntry> &entry : stringEntry.indexEntries) {
+ buf += encodeULEB128(entry->abbrevCode, buf);
+ for (const auto &value : entry->attrValues) {
+ endian::write32<ELFT::Endianness>(buf + 0, value.attrValue);
+ buf += value.attrSize;
+ }
+ }
+ *buf++ = 0; // Entry sentinel
+ }
+}
+
+template <class ELFT> bool DebugNamesSection<ELFT>::isNeeded() const {
+ return numChunks > 0;
+}
+
+template <class ELFT>
+static void readCompileUnitOffsets(
+ typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData,
+ typename DebugNamesSection<ELFT>::DebugNamesOutputChunk &outputChunk,
+ DWARFDataExtractor &namesExtractor) {
+ uint64_t offset = secData.locs.CUsBase;
+ uint64_t *offsetPtr = &offset;
+ outputChunk.compilationUnits.resize(secData.hdr.CompUnitCount);
+ uint32_t *bufferPtr =
+ namesExtractor.getU32(offsetPtr, outputChunk.compilationUnits.begin(),
+ secData.hdr.CompUnitCount);
+ if (!bufferPtr)
+ errorOrWarn(toString(outputChunk.sec) +
+ Twine(": error while reading CU offsets"));
+}
+
+template <class ELFT>
+static void readEntryOffsets(
+ typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData,
+ DWARFDataExtractor &namesExtractor) {
+ secData.entryOffsets.resize(secData.hdr.NameCount);
+ uint64_t offset = secData.locs.EntryOffsetsBase;
+ uint64_t *offsetPtr = &offset;
+ uint32_t *bufferPtr = namesExtractor.getU32(
+ offsetPtr, secData.entryOffsets.begin(), secData.hdr.NameCount);
+ if (!bufferPtr)
+ errorOrWarn(Twine(": error while reading entry offsets"));
+}
+
+template <class ELFT>
+static void readAttributeValues(
+ SmallVector<typename DebugNamesSection<ELFT>::AttrValueData, 3> &values,
+ typename DebugNamesSection<ELFT>::DebugNamesInputChunk &chunk,
+ uint64_t &offset,
+ typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData,
+ int32_t &parentOffset, DWARFDataExtractor &namesExtractor,
+ const DWARFDebugNames::Abbrev &abbrev) {
+ const LLDDWARFSection &namesSection = *chunk.namesSection;
+ uint64_t *offsetPtr = &offset;
+ typename DebugNamesSection<ELFT>::AttrValueData cuOrTuAttr = {0, 0};
+ for (DWARFDebugNames::AttributeEncoding attr : abbrev.Attributes) {
+ Error err = Error::success();
+ typename DebugNamesSection<ELFT>::AttrValueData newAttr;
+ uint32_t value;
+ if (attr.Index == DW_IDX_parent && attr.Form != DW_FORM_flag_present &&
+ attr.Form != DW_FORM_ref4)
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": invalid form for DW_IDX_parent"));
+ switch (attr.Form) {
+ case DW_FORM_flag_present: {
+ // Currently only DW_IDX_parent attributes (in .debug_names) can
+ // have this form. This form does not have a real value (nothing is
+ // emitted for it).
+ if (attr.Index != DW_IDX_parent)
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": invalid form for attribute"));
+ break;
+ }
+ case DW_FORM_data1:
+ case DW_FORM_ref1: {
+ newAttr.attrValue = namesExtractor.getU8(offsetPtr, &err);
+ newAttr.attrSize = 1;
+ break;
+ }
+ case DW_FORM_data2:
+ case DW_FORM_ref2: {
+ value = namesExtractor.getU16(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 2;
+ break;
+ }
+ case DW_FORM_data4:
+ case DW_FORM_ref4: {
+ value = namesExtractor.getU32(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 4;
+ if (attr.Index == dwarf::DW_IDX_parent)
+ parentOffset = value + secData.locs.EntriesBase;
+ break;
+ }
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8: {
+ value = namesExtractor.getU64(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 8;
+ break;
+ }
+ default: {
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": unrecognized form encoding ") + Twine(attr.Form) +
+ Twine(" in .debug_names abbrev table"));
+ break;
+ }
+ }
+ if (err)
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": error while reading attributes: ") +
+ toString(std::move(err)));
+ if (attr.Index == DW_IDX_compile_unit)
+ // Save it to put it at the end of the attributes list.
+ cuOrTuAttr = newAttr;
+ else if (attr.Form != DW_FORM_flag_present)
+ values.push_back(newAttr);
+ }
+
+ // Make sure the CU or TU index attribute is the last one in the list.
+ values.push_back(cuOrTuAttr);
+}
+
+template <class ELFT>
+static void
+readEntry(typename DebugNamesSection<ELFT>::NamedEntry &stringEntry,
+ typename DebugNamesSection<ELFT>::DebugNamesInputChunk &chunk,
+ typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData,
+ DWARFDataExtractor &namesExtractor,
+ const DWARFDebugNames::NameIndex &ni) {
+ std::unique_ptr<LLDDWARFSection> &namesSection = chunk.namesSection;
+ uint64_t offset = stringEntry.entryOffset;
+ // Each entry ends with a zero 'sentinel' value
+ while (offset < namesSection->Data.size() &&
+ namesSection->Data[offset] != 0) {
+ // Read & store all entries (for the same string)
+ auto entry =
+ std::make_unique<typename DebugNamesSection<ELFT>::IndexEntry>();
+ entry->poolOffset = offset;
+ Error err = Error::success();
+ // This call to namesExtractor can't fail; if there were a problem with the
+ // input, it would have been caught in the call to NamesIndex::extract, in
+ // DebugNamesSection::create.
+ uint64_t ulebValue = namesExtractor.getULEB128(&offset, &err);
+ cantFail(std::move(err));
+ entry->abbrevCode = static_cast<uint32_t>(ulebValue);
+ const auto &abbrevs = ni.getAbbrevs();
+ auto abbrevIt = abbrevs.find_as(entry->abbrevCode);
+ if (abbrevIt != abbrevs.end()) {
+ const DWARFDebugNames::Abbrev &abbrev = *abbrevIt;
+ readAttributeValues<ELFT>(entry->attrValues, chunk, offset, secData,
+ entry->parentOffset, namesExtractor, abbrev);
+ stringEntry.indexEntries.push_back(std::move(entry));
+ } else
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ Twine(": invalid abbrev code in entry"));
+ }
+ if (offset >= namesSection->Data.size())
+ errorOrWarn(
+ toString(chunk.namesSection->sec) +
+ Twine(": encountered unexpected end of section while reading entry"));
+}
+
+template <class ELFT>
+static void
+readEntries(typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData,
+ typename DebugNamesSection<ELFT>::DebugNamesInputChunk &chunk,
+ DWARFDataExtractor &namesExtractor, DataExtractor &strExtractor,
+ const DWARFDebugNames::NameIndex &ni) {
+ // Temporary map from entry offsets to address (pointer) of entry with that
+ // offset; used to find parent entries quickly.
+ DenseMap<uint32_t, typename DebugNamesSection<ELFT>::IndexEntry *> offsetMap;
+ // Reserve sizes for this chunk's hashes & namedEntries.
+ chunk.hashValues.reserve(secData.hdr.NameCount);
+ secData.namedEntries.reserve(secData.hdr.NameCount);
+ // Calculate the Entry Offsets, create initial records.
+ for (uint32_t i = 0, e = secData.hdr.NameCount; i != e; ++i) {
+ // Get string value
+ typename DebugNamesSection<ELFT>::NamedEntry stringEntry;
+ stringEntry.entryOffset =
+ secData.locs.EntriesBase + secData.entryOffsets[i];
+ uint64_t strOffsetOffset =
+ secData.locs.StringOffsetsBase + i * secData.dwarfSize;
+ stringEntry.stringOffsetOffset = strOffsetOffset;
+ uint64_t strOffset =
+ namesExtractor.getRelocatedValue(secData.dwarfSize, &strOffsetOffset);
+ StringRef name = strExtractor.getCStrRef(&strOffset);
+ stringEntry.name = name.data();
+ // Calculate hash
+ stringEntry.hashValue = caseFoldingDjbHash(name);
+ chunk.hashValues.push_back(stringEntry.hashValue);
+ // Read String Entry
+ readEntry<ELFT>(stringEntry, chunk, secData, namesExtractor, ni);
+ // Add index entries & offsets to our temporary map
+ for (const auto &entry : stringEntry.indexEntries)
+ offsetMap[entry->poolOffset] = entry.get();
+ secData.namedEntries.push_back(std::move(stringEntry));
+ }
+ // Find/assign pointers to any 'real' parent entries (needed to find correct
+ // parent entry offsets in merged data).
+ for (auto &stringEntry : secData.namedEntries)
+ for (auto &entry : stringEntry.indexEntries)
+ if (entry->parentOffset != -1)
+ entry->parentEntry = offsetMap[entry->parentOffset];
+}
+
+static uint16_t computeDebugNamesHeaderSize() {
+ // Size of the .debug_names section header, in bytes, for DWARF32:
+ uint16_t size = /* unit length */ 4 +
+ /* version */ 2 +
+ /* padding */ 2 +
+ /* CU count */ 4 +
+ /* TU count */ 4 +
+ /* Foreign TU count */ 4 +
+ /* Bucket Count */ 4 +
+ /* Name Count */ 4 +
+ /* Abbrev table size */ 4 +
+ /* Augmentation string size */ 4 +
+ /* augmentation string */ 8;
+ return size;
+}
+
+template <class ELFT>
+static void collectDebugNamesSectionData(
+ typename DebugNamesSection<ELFT>::DebugNamesInputChunk &chunk,
+ typename DebugNamesSection<ELFT>::DebugNamesOutputChunk &outputChunk,
+ DWARFDataExtractor &namesExtractor, DataExtractor &strExtractor) {
+ for (const DWARFDebugNames::NameIndex &ni : *chunk.debugNamesData) {
+ typename DebugNamesSection<ELFT>::DebugNamesSectionData &secData =
+ chunk.sectionsData.emplace_back();
+ secData.hdr = ni.getHeader();
+ if (secData.hdr.Format != DwarfFormat::DWARF32) {
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ Twine(": unsupported DWARF64"));
+ // Don't try to continue; we can't parse DWARF64 at the moment.
+ return;
+ }
+ secData.dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32);
+ secData.hdrSize = computeDebugNamesHeaderSize();
+ if (secData.hdr.Version != 5)
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ Twine(": unsupported version"));
+ findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+ secData.hdr);
+ readCompileUnitOffsets<ELFT>(secData, outputChunk, namesExtractor);
+ readEntryOffsets<ELFT>(secData, namesExtractor);
+ readEntries<ELFT>(secData, chunk, namesExtractor, strExtractor, ni);
+ }
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::collectMergedCounts(
+ MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+ SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+ mergedHdr.CompUnitCount = 0;
+ mergedHdr.LocalTypeUnitCount = 0;
+ mergedHdr.ForeignTypeUnitCount = 0;
+ mergedHdr.Version = 5;
+ mergedHdr.Format = DwarfFormat::DWARF32;
+ mergedHdr.AugmentationStringSize = 0;
+
+ for (size_t i = 0, e = numChunks; i != e; ++i) {
+ DebugNamesInputChunk &inputChunk = inputChunks[i];
+ DebugNamesOutputChunk &outputChunk = outputChunks[i];
+ inputChunk.baseCuOffsetIdx = tmpMergedCuOffsets.size();
+ for (uint32_t cuOffset : outputChunk.compilationUnits)
+ tmpMergedCuOffsets.push_back(outputChunk.sec->outSecOff + cuOffset);
+ for (const DebugNamesSectionData &data : inputChunk.sectionsData) {
+ mergedHdr.CompUnitCount += data.hdr.CompUnitCount;
+ mergedHdr.LocalTypeUnitCount += data.hdr.LocalTypeUnitCount;
+ mergedHdr.ForeignTypeUnitCount += data.hdr.ForeignTypeUnitCount;
+ // Set & check the Augmentation String.
+ if (mergedHdr.AugmentationStringSize == 0) {
+ mergedHdr.AugmentationStringSize = data.hdr.AugmentationStringSize;
+ mergedHdr.AugmentationString = data.hdr.AugmentationString;
+ } else if ((mergedHdr.AugmentationStringSize !=
+ data.hdr.AugmentationStringSize) ||
+ (mergedHdr.AugmentationString !=
+ data.hdr.AugmentationString)) {
+ // There are conflicting augmentation strings, so it's best for the
+ // merged index to not use an augmentation string.
+ mergedHdr.AugmentationString = " ";
+ mergedHdr.AugmentationStringSize = mergedHdr.AugmentationString.size();
+ }
+ }
+ }
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::Abbrev::Profile(FoldingSetNodeID &id) const {
+ id.AddInteger(tag);
+ for (const DWARFDebugNames::AttributeEncoding &attr : attributes) {
+ id.AddInteger(attr.Index);
+ id.AddInteger(attr.Form);
+ }
+}
+
+template <class ELFT>
+std::pair<uint8_t, dwarf::Form> DebugNamesSection<ELFT>::getMergedCuSizeData() {
+ // Once we've merged all the CU offsets into a single list, the original
+ // DWARF form/size (often 1 byte) may be too small to hold indices to all
+ // the offsets. Here we calculate what the right form/size needs to be for
+ // the merged index.
+ uint8_t size;
+ dwarf::Form form;
+ // TODO: Investigate possibly using DIEInteger::BestForm here
+ if (mergedHdr.CompUnitCount > UINT32_MAX) {
+ form = DW_FORM_data8;
+ size = 8;
+ } else if (mergedHdr.CompUnitCount > UINT16_MAX) {
+ form = DW_FORM_data4;
+ size = 4;
+ } else if (mergedHdr.CompUnitCount > UINT8_MAX) {
+ form = DW_FORM_data2;
+ size = 2;
+ } else {
+ form = DW_FORM_data1;
+ size = 1;
+ }
+
+ return {size, form};
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::getMergedAbbrevTable(
+ MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+ MapVector<uint64_t, Abbrev> abbrevMap;
+ FoldingSet<Abbrev> AbbreviationsSet;
+
+ // Need to determine what size form is needed for the DW_IDX_compile_unit
+ // attributes in the merged index. Will need to update the abbrevs to use
+ // the right form.
+ dwarf::Form compileUnitAttrForm = getMergedCuSizeData().second;
+
+ for (DebugNamesInputChunk &chunk : inputChunks) {
+ for (const DWARFDebugNames::NameIndex &ni : *chunk.debugNamesData) {
+ const auto &abbrevs = ni.getAbbrevs();
+ for (const DWARFDebugNames::Abbrev &abbrev : abbrevs) {
+ // Create canonicalized abbrev.
+ Abbrev newAbbrev;
+ DWARFDebugNames::AttributeEncoding cuOrTuAttr(DW_IDX_compile_unit,
+ compileUnitAttrForm);
+ newAbbrev.code = abbrev.Code;
+ newAbbrev.tag = abbrev.Tag;
+ for (const DWARFDebugNames::AttributeEncoding attr :
+ abbrev.Attributes) {
+ DWARFDebugNames::AttributeEncoding newAttr(attr.Index, attr.Form);
+ if (attr.Index == DW_IDX_compile_unit)
+ // Save it, to put it at the end.
+ cuOrTuAttr.Index = newAttr.Index;
+ else
+ newAbbrev.attributes.push_back(newAttr);
+ }
+ // Put the CU/TU index at the end of the attributes list.
+ newAbbrev.attributes.push_back(cuOrTuAttr);
+
+ // Next, check our abbreviations set to see if we've already seen the
+ // identical abbreviation.
+ uint32_t oldCode = newAbbrev.code;
+ uint32_t newCode;
+ FoldingSetNodeID id;
+ newAbbrev.Profile(id);
+ void *insertPos;
+ if (Abbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(id, insertPos)) {
+ // Found it; we've already seen an identical abbreviation.
+ newCode = Existing->code;
+ } else {
+ // Didn't find it.
+ Abbrev *newAbbrev2 =
+ new (abbrevAlloc.Allocate()) Abbrev(std::move(newAbbrev));
+ AbbreviationsSet.InsertNode(newAbbrev2, insertPos);
+ newCode = mergedAbbrevTable.size() + 1;
+ newAbbrev2->code = newCode;
+ mergedAbbrevTable.push_back(newAbbrev2);
+ }
+ chunk.abbrevCodeMap[oldCode] = newCode;
+ }
+ }
+ }
+
+ // Calculate the merged abbrev table size.
+ mergedHdr.AbbrevTableSize = 0;
+ for (Abbrev *a : mergedAbbrevTable) {
+ mergedHdr.AbbrevTableSize += getULEB128Size(a->code);
+ mergedHdr.AbbrevTableSize += getULEB128Size(a->tag);
+ for (const DWARFDebugNames::AttributeEncoding &attr : a->attributes) {
+ mergedHdr.AbbrevTableSize += getULEB128Size(attr.Index);
+ mergedHdr.AbbrevTableSize += getULEB128Size(attr.Form);
+ }
+ mergedHdr.AbbrevTableSize += 2; // attribute index & form sentinels
+ }
+ ++mergedHdr.AbbrevTableSize; // abbrev table sentinel
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::getMergedSymbols(
+ MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+ // The number of symbols (& abbrevs) we will handle is very large; will use
+ // multi-threading to speed it up.
+ constexpr size_t numShards = 32;
+ const size_t concurrency =
+ bit_floor(std::min<size_t>(config->threadCount, numShards));
+ const size_t shift = 32 - countr_zero(numShards);
+
+ struct ShardData {
+ // Map to uniquify symbols by name
+ MapVector<CachedHashStringRef, NamedEntry> nameMap;
+ };
+
+ // Need to determine what size is needed for the DW_IDX_compile_unit
+ // attributes in the merged index. Will need to update the indexEntries
+ // to use the right size.
+ uint8_t compileUnitAttrSize = getMergedCuSizeData().first;
+
+ auto shardsPtr = std::make_unique<ShardData[]>(numShards);
+ MutableArrayRef<ShardData> shards(shardsPtr.get(), numShards);
+
+ parallelFor(0, concurrency, [&](size_t threadId) {
+ for (size_t i = 0, e = numChunks; i != e; ++i) {
+ DebugNamesInputChunk &chunk = inputChunks[i];
+ for (DebugNamesSectionData &secData : chunk.sectionsData) {
+ // Deduplicate the NamedEntry records (based on the string/name),
+ // using a map from string/name to NamedEntry records.
+ // Note there is a twist: If there is already a record for the current
+ // 'string' in the nameMap, we append all the indexEntries from the
+ // current record to the record that's in the nameMap. I.e. we
+ // deduplicate the *strings* but we keep all the IndexEntry records
+ // (moving them to the appropriate 'kept' NamedEntry record).
+ for (NamedEntry &stringEntry : secData.namedEntries) {
+ size_t shardId = stringEntry.hashValue >> shift;
+ if ((shardId & (concurrency - 1)) != threadId)
+ continue;
+
+ auto &shard = shards[shardId];
+ stringEntry.chunkIdx = i;
+ for (std::unique_ptr<IndexEntry> &entry : stringEntry.indexEntries) {
+ // The DW_IDX_compile_unit is always the last attribute (we set it
+ // up that way when we read/created the attributes). We need to
+ // update the index value to use the correct merged offset, and we
+ // need to fix the size of the index attribute.
+ uint8_t endPos = entry->attrValues.size() - 1;
+ entry->attrValues[endPos].attrValue += chunk.baseCuOffsetIdx;
+ entry->attrValues[endPos].attrSize = compileUnitAttrSize;
+ // Update the entry's abbrev code to match the merged
+ // abbreviations.
+ entry->abbrevCode = chunk.abbrevCodeMap[entry->abbrevCode];
+ }
+
+ CachedHashStringRef cachedHashName(stringEntry.name);
+ auto [it, inserted] =
+ shard.nameMap.try_emplace(cachedHashName, std::move(stringEntry));
+ if (!inserted) {
+ // Found the string already; don't add to map, but append
+ // entry/entries for it to existing map entry.
+ NamedEntry &found = it->second;
+ // Append the entries...
+ for (auto &entry : stringEntry.indexEntries)
+ found.indexEntries.push_back(std::move(entry));
+ }
+ }
+ }
+ }
+ });
+
+ // Combined the shared symbols into mergedEntries
+ for (ShardData &shard : shards)
+ for (auto &mapEntry : shard.nameMap)
+ mergedEntries.push_back(std::move(mapEntry.second));
+ mergedHdr.NameCount = mergedEntries.size();
+}
+
+template <class ELFT>
+void DebugNamesSection<ELFT>::computeUniqueHashes(
+ MutableArrayRef<DebugNamesInputChunk> &chunks) {
+ SmallVector<uint32_t, 0> uniques;
+ for (const auto &chunk : chunks)
+ uniques.append(chunk.hashValues);
+ mergedHdr.BucketCount = dwarf::getDebugNamesBucketAndHashCount(uniques).first;
+}
+
+template <class ELFT> void DebugNamesSection<ELFT>::generateBuckets() {
+ bucketList.resize(mergedHdr.BucketCount);
----------------
MaskRay wrote:
`bucketList` can be postponed to writeTo to avoid an expensive member variable that consumes a lot of memory.
Fixed in my branch.
https://github.com/llvm/llvm-project/pull/86508
More information about the llvm-commits
mailing list