[llvm] [BOLT][DWARF][NFC] Refactor updateUnitDebugInfo (PR #100811)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 13:46:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Sayhaan Siddiqui (sayhaan)
<details>
<summary>Changes</summary>
Refactors updateUnitDebugInfo so the code for each case is in its own function.
---
Patch is 39.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100811.diff
2 Files Affected:
- (modified) bolt/include/bolt/Rewrite/DWARFRewriter.h (+26)
- (modified) bolt/lib/Rewrite/DWARFRewriter.cpp (+411-350)
``````````diff
diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h
index b798c5b76fc28..9ff6c2c892fda 100644
--- a/bolt/include/bolt/Rewrite/DWARFRewriter.h
+++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h
@@ -115,6 +115,32 @@ class DWARFRewriter {
DebugAddrWriter &AddressWriter,
std::optional<uint64_t> RangesBase = std::nullopt);
+ void handleCompileUnit(DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugLocWriter &DebugLocWriter,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter,
+ std::optional<uint64_t> &RangesBase);
+
+ void
+ handleSubprogram(DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter,
+ std::map<DebugAddressRangesVector, uint64_t> &CachedRanges);
+
+ void handleLexicalBlock(
+ DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter,
+ std::map<DebugAddressRangesVector, uint64_t> &CachedRanges);
+
+ void handleCallSite(DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugAddrWriter &AddressWriter);
+
+ void handleDefaultCase(DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugLocWriter &DebugLocWriter,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter);
+
/// Patches the binary for an object's address ranges to be updated.
/// The object can be anything that has associated address ranges via either
/// DW_AT_low/high_pc or DW_AT_ranges (i.e. functions, lexical blocks, etc).
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 674b5f17adb3f..17c3dca16d7d2 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -781,7 +781,6 @@ void DWARFRewriter::updateUnitDebugInfo(
DWARFUnit &Unit, DIEBuilder &DIEBldr, DebugLocWriter &DebugLocWriter,
DebugRangesSectionWriter &RangesSectionWriter,
DebugAddrWriter &AddressWriter, std::optional<uint64_t> RangesBase) {
- // Cache debug ranges so that the offset for identical ranges could be reused.
std::map<DebugAddressRangesVector, uint64_t> CachedRanges;
uint64_t DIEOffset = Unit.getOffset() + Unit.getHeaderSize();
@@ -789,7 +788,87 @@ void DWARFRewriter::updateUnitDebugInfo(
const std::vector<std::unique_ptr<DIEBuilder::DIEInfo>> &DIs =
DIEBldr.getDIEsByUnit(Unit);
- // Either updates or normalizes DW_AT_range to DW_AT_low_pc and DW_AT_high_pc.
+ for (const std::unique_ptr<DIEBuilder::DIEInfo> &DI : DIs) {
+ DIE *Die = DI->Die;
+ switch (Die->getTag()) {
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_skeleton_unit:
+ handleCompileUnit(*Die, Unit, DIEBldr, DebugLocWriter,
+ RangesSectionWriter, AddressWriter, RangesBase);
+ break;
+ case dwarf::DW_TAG_subprogram:
+ handleSubprogram(*Die, Unit, DIEBldr, RangesSectionWriter, AddressWriter,
+ CachedRanges);
+ break;
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_inlined_subroutine:
+ case dwarf::DW_TAG_try_block:
+ case dwarf::DW_TAG_catch_block:
+ handleLexicalBlock(*Die, Unit, DIEBldr, RangesSectionWriter,
+ AddressWriter, CachedRanges);
+ break;
+ case dwarf::DW_TAG_call_site:
+ handleCallSite(*Die, Unit, DIEBldr, AddressWriter);
+ break;
+ default:
+ handleDefaultCase(*Die, Unit, DIEBldr, DebugLocWriter,
+ RangesSectionWriter, AddressWriter);
+ break;
+ }
+ }
+
+ if (DIEOffset > NextCUOffset)
+ errs() << "BOLT-WARNING: corrupt DWARF detected at 0x"
+ << Twine::utohexstr(Unit.getOffset()) << '\n';
+}
+
+void DWARFRewriter::handleCompileUnit(
+ DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugLocWriter &DebugLocWriter,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter, std::optional<uint64_t> &RangesBase) {
+ // For dwarf5 section 3.1.3
+ // The following attributes are not part of a split full compilation unit
+ // entry but instead are inherited (if present) from the corresponding
+ // skeleton compilation unit: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges,
+ // DW_AT_stmt_list, DW_AT_comp_dir, DW_AT_str_offsets_base,
+ // DW_AT_addr_base and DW_AT_rnglists_base.
+ if (Unit.getVersion() == 5 && Unit.isDWOUnit())
+ return;
+ auto ModuleRangesOrError = getDIEAddressRanges(Die, Unit);
+ if (!ModuleRangesOrError) {
+ consumeError(ModuleRangesOrError.takeError());
+ return;
+ }
+ DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
+ DebugAddressRangesVector OutputRanges =
+ BC.translateModuleAddressRanges(ModuleRanges);
+ DIEValue LowPCAttrInfo = Die.findAttribute(dwarf::DW_AT_low_pc);
+ // For a case where LLD GCs only function used in the CU.
+ // If CU doesn't have DW_AT_low_pc we are not going to convert,
+ // so don't need to do anything.
+ if (OutputRanges.empty() && !Unit.isDWOUnit() && LowPCAttrInfo)
+ OutputRanges.push_back({0, 0});
+ const uint64_t RangesSectionOffset =
+ RangesSectionWriter.addRanges(OutputRanges);
+ // Don't emit the zero low_pc arange.
+ if (!Unit.isDWOUnit() && !OutputRanges.empty() && OutputRanges.back().LowPC)
+ ARangesSectionWriter->addCURanges(Unit.getOffset(),
+ std::move(OutputRanges));
+ updateDWARFObjectAddressRanges(Unit, DIEBldr, Die, RangesSectionOffset,
+ RangesBase);
+ DIEValue StmtListAttrVal = Die.findAttribute(dwarf::DW_AT_stmt_list);
+ if (LineTablePatchMap.count(&Unit))
+ DIEBldr.replaceValue(&Die, dwarf::DW_AT_stmt_list,
+ StmtListAttrVal.getForm(),
+ DIEInteger(LineTablePatchMap[&Unit]));
+}
+
+void DWARFRewriter::handleSubprogram(
+ DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter,
+ std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
auto updateLowPCHighPC = [&](DIE *Die, const DIEValue &LowPCVal,
const DIEValue &HighPCVal, uint64_t LowPC,
const uint64_t HighPC) {
@@ -827,83 +906,36 @@ void DWARFRewriter::updateUnitDebugInfo(
DIEBldr.addValue(Die, AttrHighPC, FormHighPC, DIEInteger(Size));
}
};
-
- for (const std::unique_ptr<DIEBuilder::DIEInfo> &DI : DIs) {
- DIE *Die = DI->Die;
- switch (Die->getTag()) {
- case dwarf::DW_TAG_compile_unit:
- case dwarf::DW_TAG_skeleton_unit: {
- // For dwarf5 section 3.1.3
- // The following attributes are not part of a split full compilation unit
- // entry but instead are inherited (if present) from the corresponding
- // skeleton compilation unit: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges,
- // DW_AT_stmt_list, DW_AT_comp_dir, DW_AT_str_offsets_base,
- // DW_AT_addr_base and DW_AT_rnglists_base.
- if (Unit.getVersion() == 5 && Unit.isDWOUnit())
- continue;
- auto ModuleRangesOrError = getDIEAddressRanges(*Die, Unit);
- if (!ModuleRangesOrError) {
- consumeError(ModuleRangesOrError.takeError());
- break;
- }
- DWARFAddressRangesVector &ModuleRanges = *ModuleRangesOrError;
- DebugAddressRangesVector OutputRanges =
- BC.translateModuleAddressRanges(ModuleRanges);
- DIEValue LowPCAttrInfo = Die->findAttribute(dwarf::DW_AT_low_pc);
- // For a case where LLD GCs only function used in the CU.
- // If CU doesn't have DW_AT_low_pc we are not going to convert,
- // so don't need to do anything.
- if (OutputRanges.empty() && !Unit.isDWOUnit() && LowPCAttrInfo)
- OutputRanges.push_back({0, 0});
- const uint64_t RangesSectionOffset =
- RangesSectionWriter.addRanges(OutputRanges);
- // Don't emit the zero low_pc arange.
- if (!Unit.isDWOUnit() && !OutputRanges.empty() &&
- OutputRanges.back().LowPC)
- ARangesSectionWriter->addCURanges(Unit.getOffset(),
- std::move(OutputRanges));
- updateDWARFObjectAddressRanges(Unit, DIEBldr, *Die, RangesSectionOffset,
- RangesBase);
- DIEValue StmtListAttrVal = Die->findAttribute(dwarf::DW_AT_stmt_list);
- if (LineTablePatchMap.count(&Unit))
- DIEBldr.replaceValue(Die, dwarf::DW_AT_stmt_list,
- StmtListAttrVal.getForm(),
- DIEInteger(LineTablePatchMap[&Unit]));
- break;
+ // Get function address either from ranges or [LowPC, HighPC) pair.
+ uint64_t Address = UINT64_MAX;
+ uint64_t SectionIndex, HighPC;
+ DebugAddressRangesVector FunctionRanges;
+ if (!getLowAndHighPC(Die, Unit, Address, HighPC, SectionIndex)) {
+ Expected<DWARFAddressRangesVector> RangesOrError =
+ getDIEAddressRanges(Die, Unit);
+ if (!RangesOrError) {
+ consumeError(RangesOrError.takeError());
+ return;
}
+ DWARFAddressRangesVector Ranges = *RangesOrError;
+ // Not a function definition.
+ if (Ranges.empty())
+ return;
- case dwarf::DW_TAG_subprogram: {
- // Get function address either from ranges or [LowPC, HighPC) pair.
- uint64_t Address = UINT64_MAX;
- uint64_t SectionIndex, HighPC;
- DebugAddressRangesVector FunctionRanges;
- if (!getLowAndHighPC(*Die, Unit, Address, HighPC, SectionIndex)) {
- Expected<DWARFAddressRangesVector> RangesOrError =
- getDIEAddressRanges(*Die, Unit);
- if (!RangesOrError) {
- consumeError(RangesOrError.takeError());
- break;
- }
- DWARFAddressRangesVector Ranges = *RangesOrError;
- // Not a function definition.
- if (Ranges.empty())
- break;
-
- for (const DWARFAddressRange &Range : Ranges) {
- if (const BinaryFunction *Function =
- BC.getBinaryFunctionAtAddress(Range.LowPC))
- FunctionRanges.append(Function->getOutputAddressRanges());
- }
- } else {
- if (const BinaryFunction *Function =
- BC.getBinaryFunctionAtAddress(Address))
- FunctionRanges = Function->getOutputAddressRanges();
- }
+ for (const DWARFAddressRange &Range : Ranges) {
+ if (const BinaryFunction *Function =
+ BC.getBinaryFunctionAtAddress(Range.LowPC))
+ FunctionRanges.append(Function->getOutputAddressRanges());
+ }
+ } else {
+ if (const BinaryFunction *Function = BC.getBinaryFunctionAtAddress(Address))
+ FunctionRanges = Function->getOutputAddressRanges();
+ }
// Clear cached ranges as the new function will have its own set.
CachedRanges.clear();
- DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc);
- DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc);
+ DIEValue LowPCVal = Die.findAttribute(dwarf::DW_AT_low_pc);
+ DIEValue HighPCVal = Die.findAttribute(dwarf::DW_AT_high_pc);
if (FunctionRanges.empty()) {
if (LowPCVal && HighPCVal)
FunctionRanges.push_back({0, HighPCVal.getDIEInteger().getValue()});
@@ -912,123 +944,163 @@ void DWARFRewriter::updateUnitDebugInfo(
}
if (FunctionRanges.size() == 1 && !opts::AlwaysConvertToRanges) {
- updateLowPCHighPC(Die, LowPCVal, HighPCVal, FunctionRanges.back().LowPC,
+ updateLowPCHighPC(&Die, LowPCVal, HighPCVal,
+ FunctionRanges.back().LowPC,
FunctionRanges.back().HighPC);
- break;
+ return;
}
updateDWARFObjectAddressRanges(
- Unit, DIEBldr, *Die, RangesSectionWriter.addRanges(FunctionRanges));
+ Unit, DIEBldr, Die, RangesSectionWriter.addRanges(FunctionRanges));
+}
- break;
+void DWARFRewriter::handleLexicalBlock(
+ DIE &Die, DWARFUnit &Unit, DIEBuilder &DIEBldr,
+ DebugRangesSectionWriter &RangesSectionWriter,
+ DebugAddrWriter &AddressWriter,
+ std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
+ auto updateLowPCHighPC = [&](DIE *Die, const DIEValue &LowPCVal,
+ const DIEValue &HighPCVal, uint64_t LowPC,
+ const uint64_t HighPC) {
+ dwarf::Attribute AttrLowPC = dwarf::DW_AT_low_pc;
+ dwarf::Form FormLowPC = dwarf::DW_FORM_addr;
+ dwarf::Attribute AttrHighPC = dwarf::DW_AT_high_pc;
+ dwarf::Form FormHighPC = dwarf::DW_FORM_data4;
+ const uint32_t Size = HighPC - LowPC;
+ // Whatever was generated is not low_pc/high_pc, so will reset to
+ // default for size 1.
+ if (!LowPCVal || !HighPCVal) {
+ if (Unit.getVersion() >= 5)
+ FormLowPC = dwarf::DW_FORM_addrx;
+ else if (Unit.isDWOUnit())
+ FormLowPC = dwarf::DW_FORM_GNU_addr_index;
+ } else {
+ AttrLowPC = LowPCVal.getAttribute();
+ FormLowPC = LowPCVal.getForm();
+ AttrHighPC = HighPCVal.getAttribute();
+ FormHighPC = HighPCVal.getForm();
}
- case dwarf::DW_TAG_lexical_block:
- case dwarf::DW_TAG_inlined_subroutine:
- case dwarf::DW_TAG_try_block:
- case dwarf::DW_TAG_catch_block: {
- uint64_t RangesSectionOffset = 0;
- Expected<DWARFAddressRangesVector> RangesOrError =
- getDIEAddressRanges(*Die, Unit);
- const BinaryFunction *Function =
- RangesOrError && !RangesOrError->empty()
- ? BC.getBinaryFunctionContainingAddress(
- RangesOrError->front().LowPC)
- : nullptr;
- DebugAddressRangesVector OutputRanges;
- if (Function) {
- OutputRanges = translateInputToOutputRanges(*Function, *RangesOrError);
- LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
- dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
- << Twine::utohexstr(Die->getOffset()) << " in CU at 0x"
- << Twine::utohexstr(Unit.getOffset()) << '\n';
- });
- if (opts::AlwaysConvertToRanges || OutputRanges.size() > 1) {
- RangesSectionOffset = RangesSectionWriter.addRanges(
- std::move(OutputRanges), CachedRanges);
- OutputRanges.clear();
- } else if (OutputRanges.empty()) {
- OutputRanges.push_back({0, RangesOrError.get().front().HighPC});
- }
- } else if (!RangesOrError) {
- consumeError(RangesOrError.takeError());
- } else {
- OutputRanges.push_back({0, !RangesOrError->empty()
- ? RangesOrError.get().front().HighPC
- : 0});
- }
- DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc);
- DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc);
- if (OutputRanges.size() == 1) {
- updateLowPCHighPC(Die, LowPCVal, HighPCVal, OutputRanges.back().LowPC,
- OutputRanges.back().HighPC);
- break;
- }
- updateDWARFObjectAddressRanges(Unit, DIEBldr, *Die, RangesSectionOffset);
- break;
+
+ if (FormLowPC == dwarf::DW_FORM_addrx ||
+ FormLowPC == dwarf::DW_FORM_GNU_addr_index)
+ LowPC = AddressWriter.getIndexFromAddress(LowPC, Unit);
+
+ if (LowPCVal)
+ DIEBldr.replaceValue(Die, AttrLowPC, FormLowPC, DIEInteger(LowPC));
+ else
+ DIEBldr.addValue(Die, AttrLowPC, FormLowPC, DIEInteger(LowPC));
+ if (HighPCVal) {
+ DIEBldr.replaceValue(Die, AttrHighPC, FormHighPC, DIEInteger(Size));
+ } else {
+ DIEBldr.deleteValue(Die, dwarf::DW_AT_ranges);
+ DIEBldr.addValue(Die, AttrHighPC, FormHighPC, DIEInteger(Size));
}
- case dwarf::DW_TAG_call_site: {
- auto patchPC = [&](DIE *Die, DIEValue &AttrVal, StringRef Entry) -> void {
- std::optional<uint64_t> Address = getAsAddress(Unit, AttrVal);
- const BinaryFunction *Function =
- BC.getBinaryFunctionContainingAddress(*Address);
- uint64_t UpdatedAddress = *Address;
- if (Function)
- UpdatedAddress =
- Function->translateInputToOutputAddress(UpdatedAddress);
-
- if (AttrVal.getForm() == dwarf::DW_FORM_addrx) {
- const uint32_t Index =
- AddressWriter.getIndexFromAddress(UpdatedAddress, Unit);
- DIEBldr.replaceValue(Die, AttrVal.getAttribute(), AttrVal.getForm(),
- DIEInteger(Index));
- } else if (AttrVal.getForm() == dwarf::DW_FORM_addr) {
- DIEBldr.replaceValue(Die, AttrVal.getAttribute(), AttrVal.getForm(),
- DIEInteger(UpdatedAddress));
- } else {
- errs() << "BOLT-ERROR: unsupported form for " << Entry << "\n";
- }
- };
- DIEValue CallPcAttrVal = Die->findAttribute(dwarf::DW_AT_call_pc);
- if (CallPcAttrVal)
- patchPC(Die, CallPcAttrVal, "DW_AT_call_pc");
+ };
- DIEValue CallRetPcAttrVal =
- Die->findAttribute(dwarf::DW_AT_call_return_pc);
- if (CallRetPcAttrVal)
- patchPC(Die, CallRetPcAttrVal, "DW_AT_call_return_pc");
+ uint64_t RangesSectionOffset = 0;
+ Expected<DWARFAddressRangesVector> RangesOrError =
+ getDIEAddressRanges(Die, Unit);
+ const BinaryFunction *Function =
+ RangesOrError && !RangesOrError->empty()
+ ? BC.getBinaryFunctionContainingAddress(RangesOrError->front().LowPC)
+ : nullptr;
+ DebugAddressRangesVector OutputRanges;
+ if (Function) {
+ OutputRanges = translateInputToOutputRanges(*Function, *RangesOrError);
+ LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) {
+ dbgs() << "BOLT-DEBUG: problem with DIE at 0x"
+ << Twine::utohexstr(Die.getOffset()) << " in CU at 0x"
+ << Twine::utohexstr(Unit.getOffset()) << '\n';
+ });
+ if (opts::AlwaysConvertToRanges || OutputRanges.size() > 1) {
+ RangesSectionOffset =
+ RangesSectionWriter.addRanges(std::move(OutputRanges), CachedRanges);
+ OutputRanges.clear();
+ } else if (OutputRanges.empty()) {
+ OutputRanges.push_back({0, RangesOrError.get().front().HighPC});
+ }
+ } else if (!RangesOrError) {
+ consumeError(RangesOrError.takeError());
+ } else {
+ OutputRanges.push_back(
+ {0, !RangesOrError->empty() ? RangesOrError.get().front().HighPC : 0});
+ }
+ DIEValue LowPCVal = Die.findAttribute(dwarf::DW_AT_low_pc);
+ DIEValue HighPCVal = Die.findAttribute(dwarf::DW_AT_high_pc);
+ if (OutputRanges.size() == 1) {
+ updateLowPCHighPC(&Die, LowPCVal, HighPCVal, OutputRanges.back().LowPC,
+ OutputRanges.back().HighPC);
+ return;
+ }
+ updateDWARFObjectAddressRanges(Unit, DIEBldr, Die, RangesSectionOffset);
+}
- break;
+void DWARFRewriter::handleCallSite(DIE &Die, DWARFUnit &Unit,
+ DIEBuilder &DIEBldr,
+ DebugAddrWriter &AddressWriter) {
+ auto patchPC = [&](DIE *Die, DIEValue &AttrVal, StringRef Entry) -> void {
+ std::optional<uint64_t> Address = getAsAddress(Unit, AttrVal);
+ const BinaryFunction *Function =
+ BC.getBinaryFunctionContainingAddress(*Address);
+ uint64_t UpdatedAddress = *Address;
+ if (Function)
+ UpdatedAddress = Function->translateInputToOutputAddress(UpdatedAddress);
+
+ if (AttrVal.getForm() == dwarf::DW_FORM_addrx) {
+ const uint32_t Index =
+ AddressWriter.getIndexFromAddress(UpdatedAddress, Unit);
+ DIEBldr.replaceValue(Die, AttrVal.getAttribute(), AttrVal.getForm(),
+ DIEInteger(Index));
+ } else if (AttrVal.getForm() == dwarf::DW_FORM_addr) {
+ DIEBldr.replaceValue(Die, AttrVal.getAttribute(), AttrVal.getForm(),
+ DIEInteger(UpdatedAddress));
+ } else {
+ errs() << "BOLT-ERROR: unsupported form for " << Entry << "\n";
}
- default: {
- // Handle any tag that can have DW_AT_location attribute.
- DIEValue LocAttrInfo = Die->findAttribute(dwarf::DW_AT_location);
- DIEValue LowPCAttrInf...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/100811
More information about the llvm-commits
mailing list