[llvm] ac6e48d - Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 11:32:27 PST 2025


Author: Greg Clayton
Date: 2025-11-18T11:32:23-08:00
New Revision: ac6e48de40ec8be78d407072479cdbf7aa35535d

URL: https://github.com/llvm/llvm-project/commit/ac6e48de40ec8be78d407072479cdbf7aa35535d
DIFF: https://github.com/llvm/llvm-project/commit/ac6e48de40ec8be78d407072479cdbf7aa35535d.diff

LOG: Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457)

We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets
tables for .dwo files in a .dwp file. This allows the string table to
exceed 4GB without truncating string offsets into the .debug_str section
and losing data. llvm-dwp will append all strings to the .debug_str
section for a .dwo file, and if any of the new string offsets exceed
UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64
header and then each string offset in that table can now have a 64 bit
offset.

Fixed LLDB to be able to successfully load the 64 bit string tables in
.dwp files.

Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing
with DWARF64 string table headers.

Added: 
    llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test

Modified: 
    llvm/include/llvm/DWP/DWP.h
    llvm/include/llvm/DWP/DWPStringPool.h
    llvm/lib/DWP/DWP.cpp
    llvm/tools/llvm-dwp/Opts.td
    llvm/tools/llvm-dwp/llvm-dwp.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index a759bae10d160..10fdae25d4eef 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -22,6 +22,12 @@ enum OnCuIndexOverflow {
   Continue,
 };
 
+enum Dwarf64StrOffsetsPromotion {
+  Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
+  Enabled,  ///< Convert any .debug_str_offsets tables to DWARF64 if needed.
+  Always,   ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
+};
+
 struct UnitIndexEntry {
   DWARFUnitIndex::Entry::SectionContribution Contributions[8];
   std::string Name;
@@ -68,7 +74,10 @@ struct CompileUnitIdentifiers {
 };
 
 LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
-                     OnCuIndexOverflow OverflowOptValue);
+                     OnCuIndexOverflow OverflowOptValue,
+                     Dwarf64StrOffsetsPromotion StrOffsetsOptValue);
+
+typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
 
 LLVM_ABI Error handleSection(
     const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
@@ -82,7 +91,7 @@ LLVM_ABI Error handleSection(
     std::vector<StringRef> &CurTypesSection,
     std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
     StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
+    SectionLengths &SectionLength);
 
 LLVM_ABI Expected<InfoSectionUnitHeader>
 parseInfoSectionUnitHeader(StringRef Info);

diff  --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 1354b46f156b6..d1486ff7872e1 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -32,13 +32,13 @@ class DWPStringPool {
 
   MCStreamer &Out;
   MCSection *Sec;
-  DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
-  uint32_t Offset = 0;
+  DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
+  uint64_t Offset = 0;
 
 public:
   DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
 
-  uint32_t getOffset(const char *Str, unsigned Length) {
+  uint64_t getOffset(const char *Str, unsigned Length) {
     assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
 
     auto Pair = Pool.insert(std::make_pair(Str, Offset));

diff  --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index b565edbfe96db..a563a90a1fb4d 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -413,33 +413,52 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
 }
 
 static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
-                              DenseMap<uint64_t, uint32_t> &OffsetRemapping,
-                              uint64_t &Offset, uint64_t &Size) {
-
+                              DenseMap<uint64_t, uint64_t> &OffsetRemapping,
+                              uint64_t &Offset, const uint64_t Size,
+                              uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
+  // Create a mask so we don't trigger a emitIntValue() assert below if the
+  // NewOffset is over 4GB.
+  const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX;
   while (Offset < Size) {
-    auto OldOffset = Data.getU32(&Offset);
-    auto NewOffset = OffsetRemapping[OldOffset];
-    Out.emitIntValue(NewOffset, 4);
+    const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
+    const uint64_t NewOffset = OffsetRemapping[OldOffset];
+    // Truncate the string offset like the old llvm-dwp would have if we aren't
+    // promoting the .debug_str_offsets to DWARF64.
+    Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize);
   }
 }
 
-void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
-                            MCSection *StrOffsetSection,
-                            StringRef CurStrSection,
-                            StringRef CurStrOffsetSection, uint16_t Version) {
+void writeStringsAndOffsets(
+    MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection,
+    StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version,
+    SectionLengths &SectionLength,
+    const Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
     return;
 
-  DenseMap<uint64_t, uint32_t> OffsetRemapping;
+  DenseMap<uint64_t, uint64_t> OffsetRemapping;
 
   DataExtractor Data(CurStrSection, true, 0);
   uint64_t LocalOffset = 0;
   uint64_t PrevOffset = 0;
+
+  // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
+  // emit a DWARF64 .debug_str_offsets table for this compile unit. If the
+  // \a StrOffsetsOptValue argument is Dwarf64StrOffsetsPromotion::Always, then
+  // force the emission of DWARF64 .debug_str_offsets for testing.
+  uint32_t OldOffsetSize = 4;
+  uint32_t NewOffsetSize =
+      StrOffsetsOptValue == Dwarf64StrOffsetsPromotion::Always ? 8 : 4;
   while (const char *S = Data.getCStr(&LocalOffset)) {
-    OffsetRemapping[PrevOffset] =
-        Strings.getOffset(S, LocalOffset - PrevOffset);
+    uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
+    OffsetRemapping[PrevOffset] = NewOffset;
+    // Only promote the .debug_str_offsets to DWARF64 if our setting allows it.
+    if (StrOffsetsOptValue != Dwarf64StrOffsetsPromotion::Disabled &&
+        NewOffset > UINT32_MAX) {
+      NewOffsetSize = 8;
+    }
     PrevOffset = LocalOffset;
   }
 
@@ -451,7 +470,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
   uint64_t Size = CurStrOffsetSection.size();
   if (Version > 4) {
     while (Offset < Size) {
-      uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
+      const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
       assert(HeaderSize <= Size - Offset &&
              "StrOffsetSection size is less than its header");
 
@@ -461,16 +480,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
       if (HeaderSize == 8) {
         ContributionSize = Data.getU32(&HeaderLengthOffset);
       } else if (HeaderSize == 16) {
+        OldOffsetSize = 8;
         HeaderLengthOffset += 4; // skip the dwarf64 marker
         ContributionSize = Data.getU64(&HeaderLengthOffset);
       }
       ContributionEnd = ContributionSize + HeaderLengthOffset;
-      Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
-      writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
+
+      StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
+      if (OldOffsetSize == 4 && NewOffsetSize == 8) {
+        // We had a DWARF32 .debug_str_offsets header, but we need to emit
+        // some string offsets that require 64 bit offsets on the .debug_str
+        // section. Emit the .debug_str_offsets header in DWARF64 format so we
+        // can emit string offsets that exceed UINT32_MAX without truncating
+        // the string offset.
+
+        // 2 bytes for DWARF version, 2 bytes pad.
+        const uint64_t VersionPadSize = 4;
+        const uint64_t NewLength =
+            (ContributionSize - VersionPadSize) * 2 + VersionPadSize;
+        // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
+        // value followed by the 8 byte updated length.
+        Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
+        Out.emitIntValue(NewLength, 8);
+        // Emit DWARF version as a 2 byte integer.
+        Out.emitIntValue(Version, 2);
+        // Emit 2 bytes of padding.
+        Out.emitIntValue(0, 2);
+        // Update the .debug_str_offsets section length contribution for the
+        // this .dwo file.
+        for (auto &Pair : SectionLength) {
+          if (Pair.first == DW_SECT_STR_OFFSETS) {
+            Pair.second = NewLength + 12;
+            break;
+          }
+        }
+      } else {
+        // Just emit the same .debug_str_offsets header.
+        Out.emitBytes(HeaderBytes);
+      }
+      writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
+                        OldOffsetSize, NewOffsetSize);
     }
 
   } else {
-    writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
+    assert(OldOffsetSize == NewOffsetSize);
+    writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
+                      NewOffsetSize);
   }
 }
 
@@ -562,7 +617,7 @@ Error handleSection(
     std::vector<StringRef> &CurTypesSection,
     std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
     StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
+    SectionLengths &SectionLength) {
   if (Section.isBSS())
     return Error::success();
 
@@ -620,7 +675,8 @@ Error handleSection(
 }
 
 Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
-            OnCuIndexOverflow OverflowOptValue) {
+            OnCuIndexOverflow OverflowOptValue,
+            Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
   const auto &MCOFI = *Out.getContext().getObjectFileInfo();
   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -684,7 +740,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
     // This maps each section contained in this file to its length.
     // This information is later on used to calculate the contributions,
     // i.e. offset and length, of each compile/type unit to a section.
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
+    SectionLengths SectionLength;
 
     for (const auto &Section : Obj.sections())
       if (auto Err = handleSection(
@@ -713,7 +769,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
     }
 
     writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
-                           CurStrOffsetSection, Header.Version);
+                           CurStrOffsetSection, Header.Version, SectionLength,
+                           StrOffsetsOptValue);
 
     for (auto Pair : SectionLength) {
       auto Index = getContributionIndex(Pair.first, IndexVersion);

diff  --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
new file mode 100644
index 0000000000000..26f7acae70aeb
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
@@ -0,0 +1,81 @@
+# This test tests that llvm-dwp can successfully promote .debug_str_offsets to
+# DWARF64. We do this by using a hidden option to llvm-dwp which is
+# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can
+# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64
+# version. This allows us to test the functionality without having to create a
+# 4GB .dwo file.
+
+# RUN: yaml2obj %s -o %t.dwo
+# RUN: llvm-dwp %t.dwo -o %t.dwp
+# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets-promotion
+# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets-promotion=disabled
+# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets-promotion=enabled
+# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets-promotion=always
+# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets-promotion=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s
+
+# DWARF32:      .debug_str_offsets.dwo contents:
+# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
+# DWARF32-NEXT: 0x00000008: 00000000 "main"
+# DWARF32-NEXT: 0x0000000c: 00000005 "int"
+# DWARF32-NEXT: 0x00000010: 00000009 "argc"
+# DWARF32-NEXT: 0x00000014: 0000000e "argv"
+# DWARF32-NEXT: 0x00000018: 00000013 "char"
+# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp"
+# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo"
+
+# DWARF64:      .debug_str_offsets.dwo contents:
+# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5
+# DWARF64-NEXT: 0x00000010: 0000000000000000 "main"
+# DWARF64-NEXT: 0x00000018: 0000000000000005 "int"
+# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc"
+# DWARF64-NEXT: 0x00000028: 000000000000000e "argv"
+# DWARF64-NEXT: 0x00000030: 0000000000000013 "char"
+# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
+# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"
+
+# ERROR: invalid value for --dwarf64-str-offsets-promotion. Valid values are one of: "enabled", "disabled" or "always".
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .debug_str_offsets.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         '24000000050000000000000005000000090000000E00000013000000180000004600000051000000'
+  - Name:            .debug_str.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00
+  - Name:            .debug_info.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
+  - Name:            .debug_abbrev.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .debug_str_offsets.dwo
+      - Name:            .debug_str.dwo
+      - Name:            .debug_info.dwo
+      - Name:            .debug_abbrev.dwo
+...

diff  --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index 46593bc40ebae..d4474ac073fd7 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -16,3 +16,18 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
     "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
     "\t\tthe 32 bit/4GB limits of the format.">,
   Values<"continue,soft-stop">;
+
+def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets-promotion">;
+def dwarf64StringOffsets_EQ
+    : Joined<["-", "--"], "dwarf64-str-offsets-promotion=">,
+      HelpText<"default = enabled, This allows .debug_str tables to exceed the "
+               "4GB limit\n"
+               "and have any DWARF32 .debug_str_offsets tables converted to "
+               "DWARF64 only for tables\n"
+               "that require 64 bit string offsets. = disabled, This setting "
+               "doesn't convert DWARF32\n"
+               ".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp "
+               "file. = always, This\n"
+               "forces all .debug_str_offsets tables to be emitted as DWARF64. "
+               "This is used for testing.">,
+      Values<"disabled,enabled,always">;

diff  --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 31bad2d68982b..2892450398bb6 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -125,6 +125,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
   llvm::BumpPtrAllocator A;
   llvm::StringSaver Saver{A};
   OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop;
+  Dwarf64StrOffsetsPromotion Dwarf64StrOffsetsValue =
+      Dwarf64StrOffsetsPromotion::Disabled;
+
   opt::InputArgList Args =
       Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
         llvm::errs() << Msg << '\n';
@@ -161,6 +164,27 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
     }
   }
 
+  if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets,
+                                 OPT_dwarf64StringOffsets_EQ)) {
+    if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) {
+      Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
+    } else {
+      std::string OptValue = Arg->getValue();
+      if (OptValue == "disabled") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Disabled;
+      } else if (OptValue == "enabled") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
+      } else if (OptValue == "always") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Always;
+      } else {
+        llvm::errs()
+            << "invalid value for --dwarf64-str-offsets-promotion. Valid "
+               "values are one of: \"enabled\", \"disabled\" or \"always\".\n";
+        exit(1);
+      }
+    }
+  }
+
   for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
     ExecFilenames.emplace_back(A->getValue());
 
@@ -274,7 +298,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
   if (!MS)
     return error("no object streamer for target " + TripleName, Context);
 
-  if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) {
+  if (auto Err =
+          write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) {
     logAllUnhandledErrors(std::move(Err), WithColor::error());
     return 1;
   }


        


More information about the llvm-commits mailing list