[llvm] Reduce llvm-gsymutil memory usage (PR #140740)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 14:15:56 PDT 2025
https://github.com/peremyach updated https://github.com/llvm/llvm-project/pull/140740
>From b36119f4b844c9f1f8dd979ec0ba5ec91c369c76 Mon Sep 17 00:00:00 2001
From: Arslan Khabutdinov <akhabutdinov at fb.com>
Date: Wed, 14 May 2025 02:56:12 -0700
Subject: [PATCH] Reduce llvm-gsymutil memory usage
---
.../llvm/DebugInfo/DWARF/DWARFContext.h | 7 +
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 6 +-
llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 10 +
llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 214 +++++++++---------
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 19 +-
5 files changed, 145 insertions(+), 111 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 85c42b88d0541..3d081851f361b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -102,6 +102,9 @@ class DWARFContext : public DIContext {
/// Parse a macro[.dwo] or macinfo[.dwo] section.
std::unique_ptr<DWARFDebugMacro>
parseMacroOrMacinfo(MacroSecType SectionType);
+
+ virtual Error doWorkThreadSafely(function_ref<Error()> Work) = 0;
+
};
friend class DWARFContextState;
@@ -490,6 +493,10 @@ class DWARFContext : public DIContext {
/// manually only for DWARF5.
void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; }
+ Error doWorkThreadSafely(function_ref<Error()> Work) {
+ return State->doWorkThreadSafely(Work);
+ }
+
private:
void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
std::vector<DILocal> &Result);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea89312..0f7958f28065d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -566,6 +566,9 @@ class DWARFUnit {
Error tryExtractDIEsIfNeeded(bool CUDieOnly);
+ /// clearDIEs - Clear parsed DIEs to keep memory usage low.
+ void clearDIEs(bool KeepCUDie, bool KeepDWODies = false);
+
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
@@ -581,9 +584,6 @@ class DWARFUnit {
void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
std::vector<DWARFDebugInfoEntry> &DIEs) const;
- /// clearDIEs - Clear parsed DIEs to keep memory usage low.
- void clearDIEs(bool KeepCUDie);
-
/// parseDWO - Parses .dwo file for current compile unit. Returns true if
/// it was actually constructed.
/// The \p AlternativeLocation specifies an alternative location to get
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 27aa99ae94fce..73399f15a015f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -621,6 +621,10 @@ class ThreadUnsafeDWARFContextState : public DWARFContext::DWARFContextState {
else
return getNormalTypeUnitMap();
}
+
+ Error doWorkThreadSafely(function_ref<Error()> Work) override {
+ return Work();
+ }
};
class ThreadSafeState : public ThreadUnsafeDWARFContextState {
@@ -736,6 +740,12 @@ class ThreadSafeState : public ThreadUnsafeDWARFContextState {
std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
return ThreadUnsafeDWARFContextState::getTypeUnitMap(IsDWO);
}
+
+ Error doWorkThreadSafely(function_ref<Error()> Work) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::doWorkThreadSafely(Work);
+ }
+
};
} // namespace
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index d719a47c84072..a397cad0051db 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -496,107 +496,111 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
}
Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
- if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
- return Error::success(); // Already parsed.
-
- bool HasCUDie = !DieArray.empty();
- extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
-
- if (DieArray.empty())
- return Error::success();
+ return Context.doWorkThreadSafely([&]() -> Error {
+ if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
+ return Error::success(); // Already parsed.
+
+ bool HasCUDie = !DieArray.empty();
+ extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
+
+ if (DieArray.empty())
+ return Error::success();
+
+ // If CU DIE was just parsed, copy several attribute values from it.
+ if (HasCUDie)
+ return Error::success();
+
+ DWARFDie UnitDie(this, &DieArray[0]);
+ if (std::optional<uint64_t> DWOId =
+ toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
+ Header.setDWOId(*DWOId);
+ if (!IsDWO) {
+ assert(AddrOffsetSectionBase == std::nullopt);
+ assert(RangeSectionBase == 0);
+ assert(LocSectionBase == 0);
+ AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
+ if (!AddrOffsetSectionBase)
+ AddrOffsetSectionBase =
+ toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
+ RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
+ LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
+ }
- // If CU DIE was just parsed, copy several attribute values from it.
- if (HasCUDie)
- return Error::success();
+ // In general, in DWARF v5 and beyond we derive the start of the unit's
+ // contribution to the string offsets table from the unit DIE's
+ // DW_AT_str_offsets_base attribute. Split DWARF units do not use this
+ // attribute, so we assume that there is a contribution to the string
+ // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
+ // In both cases we need to determine the format of the contribution,
+ // which may differ from the unit's format.
+ DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
+ IsLittleEndian, 0);
+ if (IsDWO || getVersion() >= 5) {
+ auto StringOffsetOrError =
+ IsDWO ? determineStringOffsetsTableContributionDWO(DA)
+ : determineStringOffsetsTableContribution(DA);
+ if (!StringOffsetOrError) {
+ return createStringError(errc::invalid_argument,
+ "invalid reference to or invalid content in "
+ ".debug_str_offsets[.dwo]: " +
+ toString(StringOffsetOrError.takeError()));
+ }
- DWARFDie UnitDie(this, &DieArray[0]);
- if (std::optional<uint64_t> DWOId =
- toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
- Header.setDWOId(*DWOId);
- if (!IsDWO) {
- assert(AddrOffsetSectionBase == std::nullopt);
- assert(RangeSectionBase == 0);
- assert(LocSectionBase == 0);
- AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
- if (!AddrOffsetSectionBase)
- AddrOffsetSectionBase =
- toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
- RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
- LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
- }
+ StringOffsetsTableContribution = *StringOffsetOrError;
+ }
- // In general, in DWARF v5 and beyond we derive the start of the unit's
- // contribution to the string offsets table from the unit DIE's
- // DW_AT_str_offsets_base attribute. Split DWARF units do not use this
- // attribute, so we assume that there is a contribution to the string
- // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
- // In both cases we need to determine the format of the contribution,
- // which may differ from the unit's format.
- DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
- IsLittleEndian, 0);
- if (IsDWO || getVersion() >= 5) {
- auto StringOffsetOrError =
- IsDWO ? determineStringOffsetsTableContributionDWO(DA)
- : determineStringOffsetsTableContribution(DA);
- if (!StringOffsetOrError)
- return createStringError(errc::invalid_argument,
- "invalid reference to or invalid content in "
- ".debug_str_offsets[.dwo]: " +
- toString(StringOffsetOrError.takeError()));
-
- StringOffsetsTableContribution = *StringOffsetOrError;
- }
+ // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
+ // describe address ranges.
+ if (getVersion() >= 5) {
+ // In case of DWP, the base offset from the index has to be added.
+ if (IsDWO) {
+ uint64_t ContributionBaseOffset = 0;
+ if (auto *IndexEntry = Header.getIndexEntry())
+ if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
+ ContributionBaseOffset = Contrib->getOffset();
+ setRangesSection(
+ &Context.getDWARFObj().getRnglistsDWOSection(),
+ ContributionBaseOffset +
+ DWARFListTableHeader::getHeaderSize(Header.getFormat()));
+ } else
+ setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
+ toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
+ DWARFListTableHeader::getHeaderSize(
+ Header.getFormat())));
+ }
- // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
- // describe address ranges.
- if (getVersion() >= 5) {
- // In case of DWP, the base offset from the index has to be added.
if (IsDWO) {
- uint64_t ContributionBaseOffset = 0;
+ // If we are reading a package file, we need to adjust the location list
+ // data based on the index entries.
+ StringRef Data = Header.getVersion() >= 5
+ ? Context.getDWARFObj().getLoclistsDWOSection().Data
+ : Context.getDWARFObj().getLocDWOSection().Data;
if (auto *IndexEntry = Header.getIndexEntry())
- if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
- ContributionBaseOffset = Contrib->getOffset();
- setRangesSection(
- &Context.getDWARFObj().getRnglistsDWOSection(),
- ContributionBaseOffset +
- DWARFListTableHeader::getHeaderSize(Header.getFormat()));
- } else
- setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
- toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
- DWARFListTableHeader::getHeaderSize(
- Header.getFormat())));
- }
+ if (const auto *C = IndexEntry->getContribution(
+ Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
+ Data = Data.substr(C->getOffset(), C->getLength());
+
+ DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
+ LocTable =
+ std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
+ LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
+ } else if (getVersion() >= 5) {
+ LocTable = std::make_unique<DWARFDebugLoclists>(
+ DWARFDataExtractor(Context.getDWARFObj(),
+ Context.getDWARFObj().getLoclistsSection(),
+ IsLittleEndian, getAddressByteSize()),
+ getVersion());
+ } else {
+ LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
+ Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
+ IsLittleEndian, getAddressByteSize()));
+ }
- if (IsDWO) {
- // If we are reading a package file, we need to adjust the location list
- // data based on the index entries.
- StringRef Data = Header.getVersion() >= 5
- ? Context.getDWARFObj().getLoclistsDWOSection().Data
- : Context.getDWARFObj().getLocDWOSection().Data;
- if (auto *IndexEntry = Header.getIndexEntry())
- if (const auto *C = IndexEntry->getContribution(
- Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
- Data = Data.substr(C->getOffset(), C->getLength());
-
- DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
- LocTable =
- std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
- LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
- } else if (getVersion() >= 5) {
- LocTable = std::make_unique<DWARFDebugLoclists>(
- DWARFDataExtractor(Context.getDWARFObj(),
- Context.getDWARFObj().getLoclistsSection(),
- IsLittleEndian, getAddressByteSize()),
- getVersion());
- } else {
- LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
- Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
- IsLittleEndian, getAddressByteSize()));
- }
+ // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
+ // skeleton CU DIE, so that DWARF users not aware of it are not broken.
- // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
- // skeleton CU DIE, so that DWARF users not aware of it are not broken.
- return Error::success();
+ return Error::success();
+ });
}
bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
@@ -651,15 +655,21 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
return true;
}
-void DWARFUnit::clearDIEs(bool KeepCUDie) {
- // Do not use resize() + shrink_to_fit() to free memory occupied by dies.
- // shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
- // It depends on the implementation whether the request is fulfilled.
- // Create a new vector with a small capacity and assign it to the DieArray to
- // have previous contents freed.
- DieArray = (KeepCUDie && !DieArray.empty())
- ? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
- : std::vector<DWARFDebugInfoEntry>();
+void DWARFUnit::clearDIEs(bool KeepCUDie, bool KeepDWODies) {
+ cantFail(Context.doWorkThreadSafely([&] {
+ if (!KeepDWODies && DWO) {
+ DWO->clearDIEs(KeepCUDie, KeepDWODies);
+ }
+ // Do not use resize() + shrink_to_fit() to free memory occupied by dies.
+ // shrink_to_fit() is a *non-binding* request to reduce capacity() to
+ // size(). It depends on the implementation whether the request is
+ // fulfilled. Create a new vector with a small capacity and assign it to the
+ // DieArray to have previous contents freed.
+ DieArray = (KeepCUDie && !DieArray.empty())
+ ? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
+ : std::vector<DWARFDebugInfoEntry>();
+ return Error::success();
+ }));
}
Expected<DWARFAddressRangesVector>
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7a0256f10ea60..1f70d273a9d9d 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -656,6 +656,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Out, CUI, Die);
+ // Release the line table, once we're done.
+ DICtx.clearLineTableForUnit(CU.get());
+ // Free any DIEs that were allocated by the DWARF parser.
+ // If/when they're needed by other CU's, they'll be recreated.
+ CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -668,12 +673,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
for (const auto &CU : DICtx.compile_units())
CU->getAbbreviations();
- // Now parse all DIEs in case we have cross compile unit references in a
- // thread pool.
DefaultThreadPool pool(hardware_concurrency(NumThreads));
- for (const auto &CU : DICtx.compile_units())
- pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
- pool.wait();
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
@@ -681,11 +681,15 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
- pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
+ pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
std::string storage;
raw_string_ostream StrStream(storage);
OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
handleDie(ThreadOut, CUI, Die);
+ DICtx.clearLineTableForUnit(CU.get());
+ // Free any DIEs that were allocated by the DWARF parser.
+ // If/when they're needed by other CU's, they'll be recreated.
+ CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
if (Out.GetOS()) {
@@ -697,6 +701,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
}
pool.wait();
}
+ // Now get rid of all the DIEs that may have been recreated
+ for (const auto &CU : DICtx.compile_units())
+ CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();
More information about the llvm-commits
mailing list