[llvm] ac6e48d - Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 11:32:27 PST 2025
Author: Greg Clayton
Date: 2025-11-18T11:32:23-08:00
New Revision: ac6e48de40ec8be78d407072479cdbf7aa35535d
URL: https://github.com/llvm/llvm-project/commit/ac6e48de40ec8be78d407072479cdbf7aa35535d
DIFF: https://github.com/llvm/llvm-project/commit/ac6e48de40ec8be78d407072479cdbf7aa35535d.diff
LOG: Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457)
We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets
tables for .dwo files in a .dwp file. This allows the string table to
exceed 4GB without truncating string offsets into the .debug_str section
and losing data. llvm-dwp will append all strings to the .debug_str
section for a .dwo file, and if any of the new string offsets exceed
UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64
header and then each string offset in that table can now have a 64 bit
offset.
Fixed LLDB to be able to successfully load the 64 bit string tables in
.dwp files.
Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing
with DWARF64 string table headers.
Added:
llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
Modified:
llvm/include/llvm/DWP/DWP.h
llvm/include/llvm/DWP/DWPStringPool.h
llvm/lib/DWP/DWP.cpp
llvm/tools/llvm-dwp/Opts.td
llvm/tools/llvm-dwp/llvm-dwp.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index a759bae10d160..10fdae25d4eef 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -22,6 +22,12 @@ enum OnCuIndexOverflow {
Continue,
};
+enum Dwarf64StrOffsetsPromotion {
+ Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
+ Enabled, ///< Convert any .debug_str_offsets tables to DWARF64 if needed.
+ Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
+};
+
struct UnitIndexEntry {
DWARFUnitIndex::Entry::SectionContribution Contributions[8];
std::string Name;
@@ -68,7 +74,10 @@ struct CompileUnitIdentifiers {
};
LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
- OnCuIndexOverflow OverflowOptValue);
+ OnCuIndexOverflow OverflowOptValue,
+ Dwarf64StrOffsetsPromotion StrOffsetsOptValue);
+
+typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
LLVM_ABI Error handleSection(
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
@@ -82,7 +91,7 @@ LLVM_ABI Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
+ SectionLengths &SectionLength);
LLVM_ABI Expected<InfoSectionUnitHeader>
parseInfoSectionUnitHeader(StringRef Info);
diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 1354b46f156b6..d1486ff7872e1 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -32,13 +32,13 @@ class DWPStringPool {
MCStreamer &Out;
MCSection *Sec;
- DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
- uint32_t Offset = 0;
+ DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
+ uint64_t Offset = 0;
public:
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
- uint32_t getOffset(const char *Str, unsigned Length) {
+ uint64_t getOffset(const char *Str, unsigned Length) {
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
auto Pair = Pool.insert(std::make_pair(Str, Offset));
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index b565edbfe96db..a563a90a1fb4d 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -413,33 +413,52 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
}
static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
- DenseMap<uint64_t, uint32_t> &OffsetRemapping,
- uint64_t &Offset, uint64_t &Size) {
-
+ DenseMap<uint64_t, uint64_t> &OffsetRemapping,
+ uint64_t &Offset, const uint64_t Size,
+ uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
+ // Create a mask so we don't trigger a emitIntValue() assert below if the
+ // NewOffset is over 4GB.
+ const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX;
while (Offset < Size) {
- auto OldOffset = Data.getU32(&Offset);
- auto NewOffset = OffsetRemapping[OldOffset];
- Out.emitIntValue(NewOffset, 4);
+ const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
+ const uint64_t NewOffset = OffsetRemapping[OldOffset];
+ // Truncate the string offset like the old llvm-dwp would have if we aren't
+ // promoting the .debug_str_offsets to DWARF64.
+ Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize);
}
}
-void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
- MCSection *StrOffsetSection,
- StringRef CurStrSection,
- StringRef CurStrOffsetSection, uint16_t Version) {
+void writeStringsAndOffsets(
+ MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection,
+ StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version,
+ SectionLengths &SectionLength,
+ const Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
// Could possibly produce an error or warning if one of these was non-null but
// the other was null.
if (CurStrSection.empty() || CurStrOffsetSection.empty())
return;
- DenseMap<uint64_t, uint32_t> OffsetRemapping;
+ DenseMap<uint64_t, uint64_t> OffsetRemapping;
DataExtractor Data(CurStrSection, true, 0);
uint64_t LocalOffset = 0;
uint64_t PrevOffset = 0;
+
+ // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
+ // emit a DWARF64 .debug_str_offsets table for this compile unit. If the
+ // \a StrOffsetsOptValue argument is Dwarf64StrOffsetsPromotion::Always, then
+ // force the emission of DWARF64 .debug_str_offsets for testing.
+ uint32_t OldOffsetSize = 4;
+ uint32_t NewOffsetSize =
+ StrOffsetsOptValue == Dwarf64StrOffsetsPromotion::Always ? 8 : 4;
while (const char *S = Data.getCStr(&LocalOffset)) {
- OffsetRemapping[PrevOffset] =
- Strings.getOffset(S, LocalOffset - PrevOffset);
+ uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
+ OffsetRemapping[PrevOffset] = NewOffset;
+ // Only promote the .debug_str_offsets to DWARF64 if our setting allows it.
+ if (StrOffsetsOptValue != Dwarf64StrOffsetsPromotion::Disabled &&
+ NewOffset > UINT32_MAX) {
+ NewOffsetSize = 8;
+ }
PrevOffset = LocalOffset;
}
@@ -451,7 +470,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
uint64_t Size = CurStrOffsetSection.size();
if (Version > 4) {
while (Offset < Size) {
- uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
+ const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
assert(HeaderSize <= Size - Offset &&
"StrOffsetSection size is less than its header");
@@ -461,16 +480,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
if (HeaderSize == 8) {
ContributionSize = Data.getU32(&HeaderLengthOffset);
} else if (HeaderSize == 16) {
+ OldOffsetSize = 8;
HeaderLengthOffset += 4; // skip the dwarf64 marker
ContributionSize = Data.getU64(&HeaderLengthOffset);
}
ContributionEnd = ContributionSize + HeaderLengthOffset;
- Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
+
+ StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
+ if (OldOffsetSize == 4 && NewOffsetSize == 8) {
+ // We had a DWARF32 .debug_str_offsets header, but we need to emit
+ // some string offsets that require 64 bit offsets on the .debug_str
+ // section. Emit the .debug_str_offsets header in DWARF64 format so we
+ // can emit string offsets that exceed UINT32_MAX without truncating
+ // the string offset.
+
+ // 2 bytes for DWARF version, 2 bytes pad.
+ const uint64_t VersionPadSize = 4;
+ const uint64_t NewLength =
+ (ContributionSize - VersionPadSize) * 2 + VersionPadSize;
+ // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
+ // value followed by the 8 byte updated length.
+ Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
+ Out.emitIntValue(NewLength, 8);
+ // Emit DWARF version as a 2 byte integer.
+ Out.emitIntValue(Version, 2);
+ // Emit 2 bytes of padding.
+ Out.emitIntValue(0, 2);
+ // Update the .debug_str_offsets section length contribution for the
+ // this .dwo file.
+ for (auto &Pair : SectionLength) {
+ if (Pair.first == DW_SECT_STR_OFFSETS) {
+ Pair.second = NewLength + 12;
+ break;
+ }
+ }
+ } else {
+ // Just emit the same .debug_str_offsets header.
+ Out.emitBytes(HeaderBytes);
+ }
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
+ OldOffsetSize, NewOffsetSize);
}
} else {
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
+ assert(OldOffsetSize == NewOffsetSize);
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
+ NewOffsetSize);
}
}
@@ -562,7 +617,7 @@ Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
+ SectionLengths &SectionLength) {
if (Section.isBSS())
return Error::success();
@@ -620,7 +675,8 @@ Error handleSection(
}
Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
- OnCuIndexOverflow OverflowOptValue) {
+ OnCuIndexOverflow OverflowOptValue,
+ Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
const auto &MCOFI = *Out.getContext().getObjectFileInfo();
MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -684,7 +740,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
// This maps each section contained in this file to its length.
// This information is later on used to calculate the contributions,
// i.e. offset and length, of each compile/type unit to a section.
- std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
+ SectionLengths SectionLength;
for (const auto &Section : Obj.sections())
if (auto Err = handleSection(
@@ -713,7 +769,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
}
writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
- CurStrOffsetSection, Header.Version);
+ CurStrOffsetSection, Header.Version, SectionLength,
+ StrOffsetsOptValue);
for (auto Pair : SectionLength) {
auto Index = getContributionIndex(Pair.first, IndexVersion);
diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
new file mode 100644
index 0000000000000..26f7acae70aeb
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
@@ -0,0 +1,81 @@
+# This test tests that llvm-dwp can successfully promote .debug_str_offsets to
+# DWARF64. We do this by using a hidden option to llvm-dwp which is
+# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can
+# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64
+# version. This allows us to test the functionality without having to create a
+# 4GB .dwo file.
+
+# RUN: yaml2obj %s -o %t.dwo
+# RUN: llvm-dwp %t.dwo -o %t.dwp
+# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets-promotion
+# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets-promotion=disabled
+# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets-promotion=enabled
+# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets-promotion=always
+# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets-promotion=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s
+
+# DWARF32: .debug_str_offsets.dwo contents:
+# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
+# DWARF32-NEXT: 0x00000008: 00000000 "main"
+# DWARF32-NEXT: 0x0000000c: 00000005 "int"
+# DWARF32-NEXT: 0x00000010: 00000009 "argc"
+# DWARF32-NEXT: 0x00000014: 0000000e "argv"
+# DWARF32-NEXT: 0x00000018: 00000013 "char"
+# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp"
+# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo"
+
+# DWARF64: .debug_str_offsets.dwo contents:
+# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5
+# DWARF64-NEXT: 0x00000010: 0000000000000000 "main"
+# DWARF64-NEXT: 0x00000018: 0000000000000005 "int"
+# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc"
+# DWARF64-NEXT: 0x00000028: 000000000000000e "argv"
+# DWARF64-NEXT: 0x00000030: 0000000000000013 "char"
+# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
+# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"
+
+# ERROR: invalid value for --dwarf64-str-offsets-promotion. Valid values are one of: "enabled", "disabled" or "always".
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_X86_64
+ SectionHeaderStringTable: .strtab
+Sections:
+ - Name: .debug_str_offsets.dwo
+ Type: SHT_PROGBITS
+ Flags: [ SHF_EXCLUDE ]
+ AddressAlign: 0x1
+ Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000'
+ - Name: .debug_str.dwo
+ Type: SHT_PROGBITS
+ Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+ AddressAlign: 0x1
+ EntSize: 0x1
+ Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00
+ - Name: .debug_info.dwo
+ Type: SHT_PROGBITS
+ Flags: [ SHF_EXCLUDE ]
+ AddressAlign: 0x1
+ Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
+ - Name: .debug_abbrev.dwo
+ Type: SHT_PROGBITS
+ Flags: [ SHF_EXCLUDE ]
+ AddressAlign: 0x1
+ Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
+ - Type: SectionHeaderTable
+ Sections:
+ - Name: .strtab
+ - Name: .debug_str_offsets.dwo
+ - Name: .debug_str.dwo
+ - Name: .debug_info.dwo
+ - Name: .debug_abbrev.dwo
+...
diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index 46593bc40ebae..d4474ac073fd7 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -16,3 +16,18 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
"\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
"\t\tthe 32 bit/4GB limits of the format.">,
Values<"continue,soft-stop">;
+
+def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets-promotion">;
+def dwarf64StringOffsets_EQ
+ : Joined<["-", "--"], "dwarf64-str-offsets-promotion=">,
+ HelpText<"default = enabled, This allows .debug_str tables to exceed the "
+ "4GB limit\n"
+ "and have any DWARF32 .debug_str_offsets tables converted to "
+ "DWARF64 only for tables\n"
+ "that require 64 bit string offsets. = disabled, This setting "
+ "doesn't convert DWARF32\n"
+ ".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp "
+ "file. = always, This\n"
+ "forces all .debug_str_offsets tables to be emitted as DWARF64. "
+ "This is used for testing.">,
+ Values<"disabled,enabled,always">;
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 31bad2d68982b..2892450398bb6 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -125,6 +125,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
llvm::BumpPtrAllocator A;
llvm::StringSaver Saver{A};
OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop;
+ Dwarf64StrOffsetsPromotion Dwarf64StrOffsetsValue =
+ Dwarf64StrOffsetsPromotion::Disabled;
+
opt::InputArgList Args =
Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
llvm::errs() << Msg << '\n';
@@ -161,6 +164,27 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
}
}
+ if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets,
+ OPT_dwarf64StringOffsets_EQ)) {
+ if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) {
+ Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
+ } else {
+ std::string OptValue = Arg->getValue();
+ if (OptValue == "disabled") {
+ Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Disabled;
+ } else if (OptValue == "enabled") {
+ Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
+ } else if (OptValue == "always") {
+ Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Always;
+ } else {
+ llvm::errs()
+ << "invalid value for --dwarf64-str-offsets-promotion. Valid "
+ "values are one of: \"enabled\", \"disabled\" or \"always\".\n";
+ exit(1);
+ }
+ }
+ }
+
for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
ExecFilenames.emplace_back(A->getValue());
@@ -274,7 +298,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
if (!MS)
return error("no object streamer for target " + TripleName, Context);
- if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) {
+ if (auto Err =
+ write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) {
logAllUnhandledErrors(std::move(Err), WithColor::error());
return 1;
}
More information about the llvm-commits
mailing list