[llvm] [BOLT][DWARF] Add support for .debug_names (PR #81062)

Thu Feb 22 13:34:47 PST 2024

================
@@ -0,0 +1,613 @@
+//===- bolt/Rewrite/DebugNames.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/DebugNames.h"
+#include "bolt/Core/BinaryContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+namespace bolt {
+DWARF5AcceleratorTable::DWARF5AcceleratorTable(
+    const bool CreateDebugNames, BinaryContext &BC,
+    DebugStrWriter &MainBinaryStrWriter)
+    : MainBinaryStrWriter(MainBinaryStrWriter) {
+  NeedToCreate = CreateDebugNames || BC.getDebugNamesSection();
+  if (!NeedToCreate)
+    return;
+  FullTableBuffer = std::make_unique<DebugStrBufferVector>();
+  FullTableStream = std::make_unique<raw_svector_ostream>(*FullTableBuffer);
+  StrBuffer = std::make_unique<DebugStrBufferVector>();
+  StrStream = std::make_unique<raw_svector_ostream>(*StrBuffer);
+  EntriesBuffer = std::make_unique<DebugStrBufferVector>();
+  Entriestream = std::make_unique<raw_svector_ostream>(*EntriesBuffer);
+  AugStringBuffer = std::make_unique<DebugStrBufferVector>();
+  AugStringtream = std::make_unique<raw_svector_ostream>(*AugStringBuffer);
+
+  // Binary has split-dwarf CUs.
+  // Even thought for non-skeleton-cu all names are in .debug_str.dwo section,
+  // for the .debug_names contributions they are in .debug_str section.
+  if (BC.getNumDWOCUs()) {
+    DataExtractor StrData(BC.DwCtx->getDWARFObj().getStrSection(),
+                          BC.DwCtx->isLittleEndian(), 0);
+    uint64_t Offset = 0;
+    uint64_t StrOffset = 0;
+    while (StrData.isValidOffset(Offset)) {
+      Error Err = Error::success();
+      const char *CStr = StrData.getCStr(&Offset, &Err);
+      if (Err) {
+        NeedToCreate = false;
+        errs() << "BOLT-WARNING: [internal-dwarf-error]: Could not extract "
+                  "string from .debug_str section at offset: "
+               << Twine::utohexstr(StrOffset) << ".\n";
+        return;
+      }
+      auto R = StrCacheToOffsetMap.try_emplace(
+          llvm::hash_value(llvm::StringRef(CStr)), StrOffset);
+      if (!R.second)
+        errs() << "BOLT-WARNING: [internal-dwarf-error]: collision occured on "
+               << CStr << " at offset : 0x" << Twine::utohexstr(StrOffset)
+               << ". Previous string offset is: 0x"
+               << Twine::utohexstr(R.first->second) << ".\n";
+      StrOffset = Offset;
+    }
+  }
+}
+
+void DWARF5AcceleratorTable::setCurrentUnit(DWARFUnit &Unit,
+                                            const uint64_t UnitStartOffset) {
+  CurrentUnit = nullptr;
+  CurrentUnitOffset = UnitStartOffset;
+  std::optional<uint64_t> DWOID = Unit.getDWOId();
+  // We process skeleton CUs after DWO Units for it.
+  // Patching offset in CU list to correct one.
+  if (!Unit.isDWOUnit() && DWOID) {
+    auto Iter = CUOffsetsToPatch.find(*DWOID);
+    // Check in case no entries were added from non skeleton DWO section.
+    if (Iter != CUOffsetsToPatch.end())
+      CUList[Iter->second] = UnitStartOffset;
+  }
+}
+
+void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
+                                     const std::optional<uint64_t> &DWOID) {
+  StrOffsetsWriter.clear();
+  StrOffsetsWriter.initialize(Unit);
+  StrSection = Unit.getStringSection();
+  if (Unit.isTypeUnit()) {
+    if (DWOID) {
+      // We adding an entry for a DWO TU. The DWO CU might not have any entries,
+      // so need to add it to the list pre-emptively.
+      auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
+      if (Iter.second)
+        CUList.push_back(0xBADBAD);
+      ForeignTUList.push_back(
+          cast_or_null<DWARFTypeUnit>(&Unit)->getTypeHash());
+    } else {
+      LocalTUList.push_back(CurrentUnitOffset);
+    }
+  } else {
+    if (DWOID) {
+      // This is a path for split dwarf without type units.
+      // We process DWO Units before Skeleton CU. So at this point we don't know
+      // the offset of Skeleton CU. Adding CULit index to a map to patch later
+      // with the correct offset.
+      auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
+      if (Iter.second)
+        CUList.push_back(0xBADBAD);
+    } else {
+      CUList.push_back(CurrentUnitOffset);
+    }
+  }
+}
+
+// Returns true if DW_TAG_variable should be included in .debug-names based on
+// section 6.1.1.1 for DWARF5 spec.
+static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {
+  if (Die.findAttribute(dwarf::Attribute::DW_AT_declaration))
+    return false;
+  const DIEValue LocAttrInfo =
+      Die.findAttribute(dwarf::Attribute::DW_AT_location);
+  if (!LocAttrInfo)
+    return false;
+  if (!(doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Exprloc,
+                              Unit.getVersion()) ||
+        doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Block,
+                              Unit.getVersion())))
+    return false;
+  std::vector<uint8_t> Sblock;
+  auto constructVect =
+      [&](const DIEValueList::const_value_range &Iter) -> void {
+    for (const DIEValue &Val : Iter)
+      Sblock.push_back(Val.getDIEInteger().getValue());
+  };
+  if (doesFormBelongToClass(LocAttrInfo.getForm(), DWARFFormValue::FC_Exprloc,
+                            Unit.getVersion()))
+    constructVect(LocAttrInfo.getDIELoc().values());
+  else
+    constructVect(LocAttrInfo.getDIEBlock().values());
+  ArrayRef<uint8_t> Expr = ArrayRef<uint8_t>(Sblock);
+  DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
+                     Unit.getContext().isLittleEndian(), 0);
+  DWARFExpression LocExpr(Data, Unit.getAddressByteSize(),
+                          Unit.getFormParams().Format);
+  for (const DWARFExpression::Operation &Expr : LocExpr)
+    if (Expr.getCode() == dwarf::DW_OP_addrx ||
+        Expr.getCode() == dwarf::DW_OP_form_tls_address)
+      return true;
+  return false;
+}
+void DWARF5AcceleratorTable::addAccelTableEntry(
+    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID) {
+  if (Unit.getVersion() < 5 || !NeedToCreate)
+    return;
+  std::string NameToUse = "";
+  auto canProcess = [&](const DIE &Die) -> bool {
+    switch (Die.getTag()) {
+    case dwarf::DW_TAG_base_type:
+    case dwarf::DW_TAG_class_type:
+    case dwarf::DW_TAG_enumeration_type:
+    case dwarf::DW_TAG_imported_declaration:
+    case dwarf::DW_TAG_pointer_type:
+    case dwarf::DW_TAG_structure_type:
+    case dwarf::DW_TAG_typedef:
+    case dwarf::DW_TAG_unspecified_type:
+      if (Die.findAttribute(dwarf::Attribute::DW_AT_name))
+        return true;
+      return false;
+    case dwarf::DW_TAG_namespace:
+      // According to DWARF5 spec namespaces without DW_AT_name needs to have
+      // "(anonymous namespace)"
+      if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
+        NameToUse = "(anonymous namespace)";
+      return true;
+    case dwarf::DW_TAG_inlined_subroutine:
+    case dwarf::DW_TAG_label:
+    case dwarf::DW_TAG_subprogram:
+      if (Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
+          Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
+          Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
+          Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
+        return true;
+      return false;
+    case dwarf::DW_TAG_variable:
+      return shouldIncludeVariable(Unit, Die);
+    default:
+      break;
+    }
+    return false;
+  };
+
+  auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU,
+                       uint32_t &DieTag) -> uint32_t {
+    IsTU = Unit.isTypeUnit();
+    DieTag = Die.getTag();
+    if (IsTU) {
+      if (DWOID)
+        return ForeignTUList.size() - 1;
+      return LocalTUList.size() - 1;
+    }
+    return CUList.size() - 1;
+  };
+
+  if (!canProcess(Die))
+    return;
+
+  // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
+  // encounter it.
+  // Invoking it here so that we don't add Units that don't have any entries.
+  if (&Unit != CurrentUnit) {
+    CurrentUnit = &Unit;
+    addUnit(Unit, DWOID);
+  }
+
+  auto addEntry = [&](DIEValue ValName) -> void {
+    if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
+        NameToUse.empty())
+      return;
+    std::string Name = "";
+    uint64_t NameIndexOffset = 0;
+    if (NameToUse.empty()) {
+      NameIndexOffset = ValName.getDIEInteger().getValue();
+      if (ValName.getForm() != dwarf::DW_FORM_strp)
+        NameIndexOffset = StrOffsetsWriter.getOffset(NameIndexOffset);
+      // Counts on strings end with '\0'.
+      Name = std::string(&StrSection.data()[NameIndexOffset]);
+    } else {
+      Name = NameToUse;
+    }
+    auto &It = Entries[Name];
+    if (It.Values.empty()) {
+      if (DWOID && NameToUse.empty()) {
+        // For DWO Unit the offset is in the .debug_str.dwo section.
+        // Need to find offset for the name in the .debug_str section.
+        llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name));
+        auto ItCache = StrCacheToOffsetMap.find(Hash);
+        if (ItCache == StrCacheToOffsetMap.end())
+          NameIndexOffset = MainBinaryStrWriter.addString(Name);
+        else
+          NameIndexOffset = ItCache->second;
+      }
+      if (!NameToUse.empty())
+        NameIndexOffset = MainBinaryStrWriter.addString(Name);
+      It.StrOffset = NameIndexOffset;
+      // This the same hash function used in DWARF5AccelTableData.
+      It.HashValue = caseFoldingDjbHash(Name);
+    }
+
+    bool IsTU = false;
+    uint32_t DieTag = 0;
+    uint32_t UnitID = getUnitID(Unit, IsTU, DieTag);
+    std::optional<unsigned> SecondIndex = std::nullopt;
+    if (IsTU && DWOID) {
+      auto Iter = CUOffsetsToPatch.find(*DWOID);
+      if (Iter == CUOffsetsToPatch.end())
+        errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
+                  "DWO ID in CU offsets for second Unit Index "
+               << Name << ". For DIE at offset: "
+               << Twine::utohexstr(CurrentUnitOffset + Die.getOffset()) << ".";
+      SecondIndex = Iter->second;
+    }
+    It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
+        Die.getOffset(), std::nullopt, DieTag, UnitID, IsTU, SecondIndex));
+  };
+
+  addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
+  addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
+  return;
+}
+
+/// Algorithm from llvm implementation.
+void DWARF5AcceleratorTable::computeBucketCount() {
+  // First get the number of unique hashes.
+  std::vector<uint32_t> Uniques;
+  Uniques.reserve(Entries.size());
+  for (const auto &E : Entries)
+    Uniques.push_back(E.second.HashValue);
+  array_pod_sort(Uniques.begin(), Uniques.end());
+  std::vector<uint32_t>::iterator P =
+      std::unique(Uniques.begin(), Uniques.end());
+
+  UniqueHashCount = std::distance(Uniques.begin(), P);
+
+  if (UniqueHashCount > 1024)
+    BucketCount = UniqueHashCount / 4;
+  else if (UniqueHashCount > 16)
+    BucketCount = UniqueHashCount / 2;
+  else
+    BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
+}
+
+/// Bucket code as in: AccelTableBase::finalize()
+void DWARF5AcceleratorTable::finalize() {
+  if (!NeedToCreate)
+    return;
+  // Figure out how many buckets we need, then compute the bucket contents and
+  // the final ordering. The hashes and offsets can be emitted by walking these
+  // data structures.
+  computeBucketCount();
+
+  // Compute bucket contents and final ordering.
+  Buckets.resize(BucketCount);
+  for (auto &E : Entries) {
+    uint32_t Bucket = E.second.HashValue % BucketCount;
+    Buckets[Bucket].push_back(&E.second);
+  }
+
+  // Sort the contents of the buckets by hash value so that hash collisions end
+  // up together. Stable sort makes testing easier and doesn't cost much more.
+  for (HashList &Bucket : Buckets) {
+    llvm::stable_sort(Bucket, [](const HashData *LHS, const HashData *RHS) {
+      return LHS->HashValue < RHS->HashValue;
+    });
+    for (HashData *H : Bucket)
+      llvm::stable_sort(H->Values, [](const BOLTDWARF5AccelTableData *LHS,
+                                      const BOLTDWARF5AccelTableData *RHS) {
+        return LHS->getDieOffset() < RHS->getDieOffset();
+      });
+  }
+
+  CUIndexForm = DIEInteger::BestForm(/*IsSigned*/ false, CUList.size() - 1);
+  TUIndexForm = DIEInteger::BestForm(
+      /*IsSigned*/ false, LocalTUList.size() + ForeignTUList.size() - 1);
+  const dwarf::FormParams FormParams{5, 4, dwarf::DwarfFormat::DWARF32, false};
+  CUIndexEncodingSize = *dwarf::getFixedFormByteSize(CUIndexForm, FormParams);
+  TUIndexEncodingSize = *dwarf::getFixedFormByteSize(TUIndexForm, FormParams);
+}
+
+std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+DWARF5AcceleratorTable::getIndexForEntry(
+    const BOLTDWARF5AccelTableData &Value) const {
+  if (Value.isTU())
+    return {{Value.getUnitID(), {dwarf::DW_IDX_type_unit, TUIndexForm}}};
+  if (CUList.size() > 1)
+    return {{Value.getUnitID(), {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+  return std::nullopt;
+}
+
+std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+DWARF5AcceleratorTable::getSecondIndexForEntry(
+    const BOLTDWARF5AccelTableData &Value) const {
+  if (Value.isTU() && CUList.size() > 1 && Value.getSecondUnitID())
+    return {
+        {*Value.getSecondUnitID(), {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+  return std::nullopt;
+}
+
+void DWARF5AcceleratorTable::populateAbbrevsMap() {
+  for (auto &Bucket : getBuckets()) {
+    for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
+      for (BOLTDWARF5AccelTableData *Value : Hash->Values) {
+        const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+            getIndexForEntry(*Value);
+        // For entries that need to refer to the foreign type units and to
+        // the CU.
+        const std::optional<DWARF5AccelTable::UnitIndexAndEncoding>
+            SecondEntryRet = getSecondIndexForEntry(*Value);
+        DebugNamesAbbrev Abbrev(Value->getDieTag());
+        if (EntryRet)
+          Abbrev.addAttribute(EntryRet->Encoding);
+        if (SecondEntryRet)
+          Abbrev.addAttribute(SecondEntryRet->Encoding);
+        Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+        FoldingSetNodeID ID;
+        Abbrev.Profile(ID);
+        void *InsertPos;
+        if (DebugNamesAbbrev *Existing =
+                AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+          Value->setAbbrevNumber(Existing->getNumber());
+          continue;
+        }
+        DebugNamesAbbrev *NewAbbrev =
+            new (Alloc) DebugNamesAbbrev(std::move(Abbrev));
+        AbbreviationsVector.push_back(NewAbbrev);
+        NewAbbrev->setNumber(AbbreviationsVector.size());
+        AbbreviationsSet.InsertNode(NewAbbrev, InsertPos);
+        Value->setAbbrevNumber(NewAbbrev->getNumber());
+      }
+    }
+  }
+}
+
+void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
+  const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+      getIndexForEntry(Entry);
+  // For forgeign type (FTU) units that need to refer to the FTU and to the CU.
+  const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> SecondEntryRet =
+      getSecondIndexForEntry(Entry);
+  const unsigned AbbrevIndex = Entry.getAbbrevNumber() - 1;
+  assert(AbbrevIndex < AbbreviationsVector.size() &&
+         "Entry abbrev index is outside of abbreviations vector range.");
+  const DebugNamesAbbrev *Abbrev = AbbreviationsVector[AbbrevIndex];
+  encodeULEB128(Entry.getAbbrevNumber(), *Entriestream);
+  auto writeIndex = [&](uint32_t Index, uint32_t IndexSize) -> void {
+    switch (IndexSize) {
+    default:
+      llvm_unreachable("Unsupported Index Size!");
+      break;
+    case sizeof(uint8_t):
----------------
aaupov wrote:

```suggestion
    case 8:
```

https://github.com/llvm/llvm-project/pull/81062