[llvm] [BOLT][DWARF] Add support for .debug_names (PR #81062)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 13:34:47 PST 2024
================
@@ -0,0 +1,613 @@
+//===- bolt/Rewrite/DebugNames.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/DebugNames.h"
+#include "bolt/Core/BinaryContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+namespace bolt {
+DWARF5AcceleratorTable::DWARF5AcceleratorTable(
+ const bool CreateDebugNames, BinaryContext &BC,
+ DebugStrWriter &MainBinaryStrWriter)
+ : MainBinaryStrWriter(MainBinaryStrWriter) {
+ NeedToCreate = CreateDebugNames || BC.getDebugNamesSection();
+ if (!NeedToCreate)
+ return;
+ FullTableBuffer = std::make_unique<DebugStrBufferVector>();
+ FullTableStream = std::make_unique<raw_svector_ostream>(*FullTableBuffer);
+ StrBuffer = std::make_unique<DebugStrBufferVector>();
+ StrStream = std::make_unique<raw_svector_ostream>(*StrBuffer);
+ EntriesBuffer = std::make_unique<DebugStrBufferVector>();
+ Entriestream = std::make_unique<raw_svector_ostream>(*EntriesBuffer);
+ AugStringBuffer = std::make_unique<DebugStrBufferVector>();
+ AugStringtream = std::make_unique<raw_svector_ostream>(*AugStringBuffer);
+
+ // Binary has split-dwarf CUs.
+ // Even thought for non-skeleton-cu all names are in .debug_str.dwo section,
+ // for the .debug_names contributions they are in .debug_str section.
+ if (BC.getNumDWOCUs()) {
+ DataExtractor StrData(BC.DwCtx->getDWARFObj().getStrSection(),
+ BC.DwCtx->isLittleEndian(), 0);
+ uint64_t Offset = 0;
+ uint64_t StrOffset = 0;
+ while (StrData.isValidOffset(Offset)) {
+ Error Err = Error::success();
+ const char *CStr = StrData.getCStr(&Offset, &Err);
+ if (Err) {
+ NeedToCreate = false;
+ errs() << "BOLT-WARNING: [internal-dwarf-error]: Could not extract "
+ "string from .debug_str section at offset: "
+ << Twine::utohexstr(StrOffset) << ".\n";
+ return;
+ }
+ auto R = StrCacheToOffsetMap.try_emplace(
+ llvm::hash_value(llvm::StringRef(CStr)), StrOffset);
+ if (!R.second)
+ errs() << "BOLT-WARNING: [internal-dwarf-error]: collision occured on "
+ << CStr << " at offset : 0x" << Twine::utohexstr(StrOffset)
+ << ". Previous string offset is: 0x"
+ << Twine::utohexstr(R.first->second) << ".\n";
+ StrOffset = Offset;
+ }
+ }
+}
+
+void DWARF5AcceleratorTable::setCurrentUnit(DWARFUnit &Unit,
+ const uint64_t UnitStartOffset) {
+ CurrentUnit = nullptr;
+ CurrentUnitOffset = UnitStartOffset;
+ std::optional<uint64_t> DWOID = Unit.getDWOId();
+ // We process skeleton CUs after DWO Units for it.
+ // Patching offset in CU list to correct one.
+ if (!Unit.isDWOUnit() && DWOID) {
+ auto Iter = CUOffsetsToPatch.find(*DWOID);
+ // Check in case no entries were added from non skeleton DWO section.
+ if (Iter != CUOffsetsToPatch.end())
+ CUList[Iter->second] = UnitStartOffset;
+ }
+}
+
+void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
+ const std::optional<uint64_t> &DWOID) {
+ StrOffsetsWriter.clear();
+ StrOffsetsWriter.initialize(Unit);
+ StrSection = Unit.getStringSection();
+ if (Unit.isTypeUnit()) {
+ if (DWOID) {
+ // We adding an entry for a DWO TU. The DWO CU might not have any entries,
+ // so need to add it to the list pre-emptively.
+ auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
+ if (Iter.second)
+ CUList.push_back(0xBADBAD);
+ ForeignTUList.push_back(
+ cast_or_null<DWARFTypeUnit>(&Unit)->getTypeHash());
+ } else {
+ LocalTUList.push_back(CurrentUnitOffset);
+ }
+ } else {
+ if (DWOID) {
+ // This is a path for split dwarf without type units.
+ // We process DWO Units before Skeleton CU. So at this point we don't know
+ // the offset of Skeleton CU. Adding CULit index to a map to patch later
+ // with the correct offset.
+ auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
+ if (Iter.second)
+ CUList.push_back(0xBADBAD);
+ } else {
+ CUList.push_back(CurrentUnitOffset);
+ }
+ }
+}
+
+// Returns true if DW_TAG_variable should be included in .debug-names based on
+// section 6.1.1.1 for DWARF5 spec.
+static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {
+ if (Die.findAttribute(dwarf::Attribute::DW_AT_declaration))
+ return false;
+ const DIEValue LocAttrInfo =
+ Die.findAttribute(dwarf::Attribute::DW_AT_location);
+ if (!LocAttrInfo)
+ return false;
+ if (!(doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Exprloc,
+ Unit.getVersion()) ||
+ doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Block,
+ Unit.getVersion())))
+ return false;
+ std::vector<uint8_t> Sblock;
+ auto constructVect =
+ [&](const DIEValueList::const_value_range &Iter) -> void {
+ for (const DIEValue &Val : Iter)
+ Sblock.push_back(Val.getDIEInteger().getValue());
+ };
+ if (doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Exprloc,
+ Unit.getVersion()))
+ constructVect(LocAttrInfo.getDIELoc().values());
+ else
+ constructVect(LocAttrInfo.getDIEBlock().values());
+ ArrayRef<uint8_t> Expr = ArrayRef<uint8_t>(Sblock);
+ DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
+ Unit.getContext().isLittleEndian(), 0);
+ DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
+ Unit.getFormParams().Format);
+ for (const DWARFExpression::Operation &Expr : LocExpr)
+ if (Expr.getCode() == dwarf::DW_OP_addrx ||
+ Expr.getCode() == dwarf::DW_OP_form_tls_address)
+ return true;
+ return false;
+}
+void DWARF5AcceleratorTable::addAccelTableEntry(
+ DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID) {
+ if (Unit.getVersion() < 5 || !NeedToCreate)
+ return;
+ std::string NameToUse = "";
+ auto canProcess = [&](const DIE &Die) -> bool {
+ switch (Die.getTag()) {
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_imported_declaration:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_unspecified_type:
+ if (Die.findAttribute(dwarf::Attribute::DW_AT_name))
+ return true;
+ return false;
+ case dwarf::DW_TAG_namespace:
+ // According to DWARF5 spec namespaces without DW_AT_name needs to have
+ // "(anonymous namespace)"
+ if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
+ NameToUse = "(anonymous namespace)";
+ return true;
+ case dwarf::DW_TAG_inlined_subroutine:
+ case dwarf::DW_TAG_label:
+ case dwarf::DW_TAG_subprogram:
+ if (Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
+ Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
+ Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
+ Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
+ return true;
+ return false;
+ case dwarf::DW_TAG_variable:
+ return shouldIncludeVariable(Unit, Die);
+ default:
+ break;
+ }
+ return false;
+ };
+
+ auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU,
+ uint32_t &DieTag) -> uint32_t {
+ IsTU = Unit.isTypeUnit();
+ DieTag = Die.getTag();
+ if (IsTU) {
+ if (DWOID)
+ return ForeignTUList.size() - 1;
+ return LocalTUList.size() - 1;
+ }
+ return CUList.size() - 1;
+ };
+
+ if (!canProcess(Die))
+ return;
+
+ // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
+ // encounter it.
+ // Invoking it here so that we don't add Units that don't have any entries.
+ if (&Unit != CurrentUnit) {
+ CurrentUnit = &Unit;
+ addUnit(Unit, DWOID);
+ }
+
+ auto addEntry = [&](DIEValue ValName) -> void {
+ if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
+ NameToUse.empty())
+ return;
+ std::string Name = "";
+ uint64_t NameIndexOffset = 0;
+ if (NameToUse.empty()) {
+ NameIndexOffset = ValName.getDIEInteger().getValue();
+ if (ValName.getForm() != dwarf::DW_FORM_strp)
+ NameIndexOffset = StrOffsetsWriter.getOffset(NameIndexOffset);
+ // Counts on strings end with '\0'.
+ Name = std::string(&StrSection.data()[NameIndexOffset]);
+ } else {
+ Name = NameToUse;
+ }
+ auto &It = Entries[Name];
+ if (It.Values.empty()) {
+ if (DWOID && NameToUse.empty()) {
+ // For DWO Unit the offset is in the .debug_str.dwo section.
+ // Need to find offset for the name in the .debug_str section.
+ llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name));
+ auto ItCache = StrCacheToOffsetMap.find(Hash);
+ if (ItCache == StrCacheToOffsetMap.end())
+ NameIndexOffset = MainBinaryStrWriter.addString(Name);
+ else
+ NameIndexOffset = ItCache->second;
+ }
+ if (!NameToUse.empty())
+ NameIndexOffset = MainBinaryStrWriter.addString(Name);
+ It.StrOffset = NameIndexOffset;
+ // This the same hash function used in DWARF5AccelTableData.
+ It.HashValue = caseFoldingDjbHash(Name);
+ }
+
+ bool IsTU = false;
+ uint32_t DieTag = 0;
+ uint32_t UnitID = getUnitID(Unit, IsTU, DieTag);
+ std::optional<unsigned> SecondIndex = std::nullopt;
+ if (IsTU && DWOID) {
+ auto Iter = CUOffsetsToPatch.find(*DWOID);
+ if (Iter == CUOffsetsToPatch.end())
+ errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
+ "DWO ID in CU offsets for second Unit Index "
+ << Name << ". For DIE at offset: "
+ << Twine::utohexstr(CurrentUnitOffset + Die.getOffset()) << ".";
+ SecondIndex = Iter->second;
+ }
+ It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
+ Die.getOffset(), std::nullopt, DieTag, UnitID, IsTU, SecondIndex));
+ };
+
+ addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
+ addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
+ return;
+}
+
+/// Algorithm from llvm implementation.
+void DWARF5AcceleratorTable::computeBucketCount() {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> Uniques;
+ Uniques.reserve(Entries.size());
+ for (const auto &E : Entries)
+ Uniques.push_back(E.second.HashValue);
+ array_pod_sort(Uniques.begin(), Uniques.end());
+ std::vector<uint32_t>::iterator P =
+ std::unique(Uniques.begin(), Uniques.end());
+
+ UniqueHashCount = std::distance(Uniques.begin(), P);
+
+ if (UniqueHashCount > 1024)
+ BucketCount = UniqueHashCount / 4;
+ else if (UniqueHashCount > 16)
+ BucketCount = UniqueHashCount / 2;
+ else
+ BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
+}
+
+/// Bucket code as in: AccelTableBase::finalize()
+void DWARF5AcceleratorTable::finalize() {
+ if (!NeedToCreate)
+ return;
+ // Figure out how many buckets we need, then compute the bucket contents and
+ // the final ordering. The hashes and offsets can be emitted by walking these
+ // data structures.
+ computeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(BucketCount);
+ for (auto &E : Entries) {
+ uint32_t Bucket = E.second.HashValue % BucketCount;
+ Buckets[Bucket].push_back(&E.second);
+ }
+
+ // Sort the contents of the buckets by hash value so that hash collisions end
+ // up together. Stable sort makes testing easier and doesn't cost much more.
+ for (HashList &Bucket : Buckets) {
+ llvm::stable_sort(Bucket, [](const HashData *LHS, const HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
+ for (HashData *H : Bucket)
+ llvm::stable_sort(H->Values, [](const BOLTDWARF5AccelTableData *LHS,
+ const BOLTDWARF5AccelTableData *RHS) {
+ return LHS->getDieOffset() < RHS->getDieOffset();
+ });
+ }
+
+ CUIndexForm = DIEInteger::BestForm(/*IsSigned*/ false, CUList.size() - 1);
+ TUIndexForm = DIEInteger::BestForm(
+ /*IsSigned*/ false, LocalTUList.size() + ForeignTUList.size() - 1);
+ const dwarf::FormParams FormParams{5, 4, dwarf::DwarfFormat::DWARF32, false};
+ CUIndexEncodingSize = *dwarf::getFixedFormByteSize(CUIndexForm, FormParams);
+ TUIndexEncodingSize = *dwarf::getFixedFormByteSize(TUIndexForm, FormParams);
+}
+
+std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+DWARF5AcceleratorTable::getIndexForEntry(
+ const BOLTDWARF5AccelTableData &Value) const {
+ if (Value.isTU())
+ return {{Value.getUnitID(), {dwarf::DW_IDX_type_unit, TUIndexForm}}};
+ if (CUList.size() > 1)
+ return {{Value.getUnitID(), {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+ return std::nullopt;
+}
+
+std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+DWARF5AcceleratorTable::getSecondIndexForEntry(
+ const BOLTDWARF5AccelTableData &Value) const {
+ if (Value.isTU() && CUList.size() > 1 && Value.getSecondUnitID())
+ return {
+ {*Value.getSecondUnitID(), {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+ return std::nullopt;
+}
+
+void DWARF5AcceleratorTable::populateAbbrevsMap() {
+ for (auto &Bucket : getBuckets()) {
+ for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
+ for (BOLTDWARF5AccelTableData *Value : Hash->Values) {
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+ getIndexForEntry(*Value);
+ // For entries that need to refer to the foreign type units and to
+ // the CU.
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+ SecondEntryRet = getSecondIndexForEntry(*Value);
+ DebugNamesAbbrev Abbrev(Value->getDieTag());
+ if (EntryRet)
+ Abbrev.addAttribute(EntryRet->Encoding);
+ if (SecondEntryRet)
+ Abbrev.addAttribute(SecondEntryRet->Encoding);
+ Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+ void *InsertPos;
+ if (DebugNamesAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Value->setAbbrevNumber(Existing->getNumber());
+ continue;
+ }
+ DebugNamesAbbrev *NewAbbrev =
+ new (Alloc) DebugNamesAbbrev(std::move(Abbrev));
+ AbbreviationsVector.push_back(NewAbbrev);
+ NewAbbrev->setNumber(AbbreviationsVector.size());
+ AbbreviationsSet.InsertNode(NewAbbrev, InsertPos);
+ Value->setAbbrevNumber(NewAbbrev->getNumber());
+ }
+ }
+ }
+}
+
+void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+ getIndexForEntry(Entry);
+ // For forgeign type (FTU) units that need to refer to the FTU and to the CU.
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> SecondEntryRet =
+ getSecondIndexForEntry(Entry);
+ const unsigned AbbrevIndex = Entry.getAbbrevNumber() - 1;
+ assert(AbbrevIndex < AbbreviationsVector.size() &&
+ "Entry abbrev index is outside of abbreviations vector range.");
+ const DebugNamesAbbrev *Abbrev = AbbreviationsVector[AbbrevIndex];
+ encodeULEB128(Entry.getAbbrevNumber(), *Entriestream);
+ auto writeIndex = [&](uint32_t Index, uint32_t IndexSize) -> void {
+ switch (IndexSize) {
+ default:
+ llvm_unreachable("Unsupported Index Size!");
+ break;
+ case sizeof(uint8_t):
----------------
aaupov wrote:
```suggestion
case 8:
```
https://github.com/llvm/llvm-project/pull/81062
More information about the llvm-commits
mailing list