[Lldb-commits] [lldb] [llvm] Modify llvm-dwp to be able to emit string tables over 4GB without losing data (PR #167457)

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Mon Nov 17 17:02:12 PST 2025


https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/167457

>From fccf37fcddd29a20744796e9f03a580dfa1b0f4a Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 10 Nov 2025 21:22:39 -0800
Subject: [PATCH 1/9] Modify llvm-dwp to be able to emit string tables over 4GB
 without losing data.

We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset.
---
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  4 +
 llvm/include/llvm/DWP/DWP.h                   |  4 +-
 llvm/include/llvm/DWP/DWPStringPool.h         |  6 +-
 llvm/lib/DWP/DWP.cpp                          | 78 +++++++++++++++----
 4 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index b78e6ce807bca..4a3dad2385c2c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -376,6 +376,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
 
     // Skip padding.
     baseOffset += 2;
+  } else {
+    // Size of offset for .debug_str_offsets is same as DWARF offset byte size
+    // of the DWARFUnit for DWARF version 4 and earlier.
+    m_str_offsets_size = m_header.getDwarfOffsetByteSize();
   }
 
   SetStrOffsetsBase(baseOffset);
diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index a759bae10d160..cc38369658eaa 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -70,6 +70,8 @@ struct CompileUnitIdentifiers {
 LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
                      OnCuIndexOverflow OverflowOptValue);
 
+typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
+
 LLVM_ABI Error handleSection(
     const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
     const MCSection *StrSection, const MCSection *StrOffsetSection,
@@ -82,7 +84,7 @@ LLVM_ABI Error handleSection(
     std::vector<StringRef> &CurTypesSection,
     std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
     StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
+    SectionLengths &SectionLength);
 
 LLVM_ABI Expected<InfoSectionUnitHeader>
 parseInfoSectionUnitHeader(StringRef Info);
diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 1354b46f156b6..d1486ff7872e1 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -32,13 +32,13 @@ class DWPStringPool {
 
   MCStreamer &Out;
   MCSection *Sec;
-  DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
-  uint32_t Offset = 0;
+  DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
+  uint64_t Offset = 0;
 
 public:
   DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
 
-  uint32_t getOffset(const char *Str, unsigned Length) {
+  uint64_t getOffset(const char *Str, unsigned Length) {
     assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
 
     auto Pair = Pool.insert(std::make_pair(Str, Offset));
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index b565edbfe96db..54edce81208b5 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -413,33 +413,43 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
 }
 
 static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
-                              DenseMap<uint64_t, uint32_t> &OffsetRemapping,
-                              uint64_t &Offset, uint64_t &Size) {
+                              DenseMap<uint64_t, uint64_t> &OffsetRemapping,
+                              uint64_t &Offset, const uint64_t Size,
+                              uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
 
   while (Offset < Size) {
-    auto OldOffset = Data.getU32(&Offset);
-    auto NewOffset = OffsetRemapping[OldOffset];
-    Out.emitIntValue(NewOffset, 4);
+    const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
+    const uint64_t NewOffset = OffsetRemapping[OldOffset];
+    assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX);
+    Out.emitIntValue(NewOffset, NewOffsetSize);
   }
 }
 
 void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
                             MCSection *StrOffsetSection,
                             StringRef CurStrSection,
-                            StringRef CurStrOffsetSection, uint16_t Version) {
+                            StringRef CurStrOffsetSection, uint16_t Version,
+                            SectionLengths &SectionLength) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
     return;
 
-  DenseMap<uint64_t, uint32_t> OffsetRemapping;
+  DenseMap<uint64_t, uint64_t> OffsetRemapping;
 
   DataExtractor Data(CurStrSection, true, 0);
   uint64_t LocalOffset = 0;
   uint64_t PrevOffset = 0;
+
+  // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
+  // emit a DWARF64 .debug_str_offsets table for this compile unit.
+  uint32_t OldOffsetSize = 4;
+  uint32_t NewOffsetSize = 4;
   while (const char *S = Data.getCStr(&LocalOffset)) {
-    OffsetRemapping[PrevOffset] =
-        Strings.getOffset(S, LocalOffset - PrevOffset);
+    uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
+    OffsetRemapping[PrevOffset] = NewOffset;
+    if (NewOffset > UINT32_MAX)
+      NewOffsetSize = 8;
     PrevOffset = LocalOffset;
   }
 
@@ -451,7 +461,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
   uint64_t Size = CurStrOffsetSection.size();
   if (Version > 4) {
     while (Offset < Size) {
-      uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
+      const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
       assert(HeaderSize <= Size - Offset &&
              "StrOffsetSection size is less than its header");
 
@@ -461,16 +471,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
       if (HeaderSize == 8) {
         ContributionSize = Data.getU32(&HeaderLengthOffset);
       } else if (HeaderSize == 16) {
+        OldOffsetSize = 8;
         HeaderLengthOffset += 4; // skip the dwarf64 marker
         ContributionSize = Data.getU64(&HeaderLengthOffset);
       }
       ContributionEnd = ContributionSize + HeaderLengthOffset;
-      Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
-      writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
+
+      StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
+      if (OldOffsetSize == 4 && NewOffsetSize == 8) {
+        // We had a DWARF32 .debug_str_offsets header, but we need to emit
+        // some string offsets that require 64 bit offsets on the .debug_str
+        // section. Emit the .debug_str_offsets header in DWARF64 format so we
+        // can emit string offsets that exceed UINT32_MAX without truncating
+        // the string offset.
+
+        // 2 bytes for DWARF version, 2 bytes pad.
+        const uint64_t VersionPadSize = 4;
+        const uint64_t NewLength =
+            (ContributionSize - VersionPadSize) * 2 + VersionPadSize;
+        // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
+        // value followed by the 8 byte updated length.
+        Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
+        Out.emitIntValue(NewLength, 8);
+        // Emit DWARF version as a 2 byte integer.
+        Out.emitIntValue(Version, 2);
+        // Emit 2 bytes of padding.
+        Out.emitIntValue(0, 2);
+        // Update the .debug_str_offsets section length contribution for the
+        // this .dwo file.
+        for (auto &Pair : SectionLength) {
+          if (Pair.first == DW_SECT_STR_OFFSETS) {
+            Pair.second = NewLength + 12;
+            break;
+          }
+        }
+      } else {
+        // Just emit the same .debug_str_offsets header.
+        Out.emitBytes(HeaderBytes);
+      }
+      writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
+                        OldOffsetSize, NewOffsetSize);
     }
 
   } else {
-    writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
+    assert(OldOffsetSize == NewOffsetSize);
+    writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
+                      NewOffsetSize);
   }
 }
 
@@ -562,7 +608,7 @@ Error handleSection(
     std::vector<StringRef> &CurTypesSection,
     std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
     StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
+    SectionLengths &SectionLength) {
   if (Section.isBSS())
     return Error::success();
 
@@ -684,7 +730,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
     // This maps each section contained in this file to its length.
     // This information is later on used to calculate the contributions,
     // i.e. offset and length, of each compile/type unit to a section.
-    std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
+    SectionLengths SectionLength;
 
     for (const auto &Section : Obj.sections())
       if (auto Err = handleSection(
@@ -713,7 +759,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
     }
 
     writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
-                           CurStrOffsetSection, Header.Version);
+                           CurStrOffsetSection, Header.Version, SectionLength);
 
     for (auto Pair : SectionLength) {
       auto Index = getContributionIndex(Pair.first, IndexVersion);

>From 98b0ee5104841e6848fd5861e6a3c234d2f80ba1 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 14 Nov 2025 16:41:02 -0800
Subject: [PATCH 2/9] Merge with upstream and remove extra code that isn't
 needed.

---
 lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 4a3dad2385c2c..b78e6ce807bca 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -376,10 +376,6 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
 
     // Skip padding.
     baseOffset += 2;
-  } else {
-    // Size of offset for .debug_str_offsets is same as DWARF offset byte size
-    // of the DWARFUnit for DWARF version 4 and earlier.
-    m_str_offsets_size = m_header.getDwarfOffsetByteSize();
   }
 
   SetStrOffsetsBase(baseOffset);

>From 604b5551e4cd5f82fff34cd8e6282b410bca1edd Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 11:22:39 -0800
Subject: [PATCH 3/9] Add new --force-dwarf64-str-offsets option to llvm-dwp
 and a test that uses it.

This patch adds a new llvm-dwp option that can be used in testing to verify that llvm-dwp can successfully upgrade a .debug_str_offsets tables from DWARF32 to DWARF64.
---
 llvm/include/llvm/DWP/DWP.h                   |  3 +-
 llvm/lib/DWP/DWP.cpp                          | 15 ++--
 .../llvm-dwp/X86/dwarf64-str-offsets.test     | 72 +++++++++++++++++++
 llvm/tools/llvm-dwp/Opts.td                   |  4 ++
 llvm/tools/llvm-dwp/llvm-dwp.cpp              |  6 +-
 5 files changed, 93 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test

diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index cc38369658eaa..1ad4bbcefa988 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -68,7 +68,8 @@ struct CompileUnitIdentifiers {
 };
 
 LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
-                     OnCuIndexOverflow OverflowOptValue);
+                     OnCuIndexOverflow OverflowOptValue,
+                     bool ForceDwarf64StringOffsets);
 
 typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
 
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index 54edce81208b5..a92cf2339506f 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -429,7 +429,8 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
                             MCSection *StrOffsetSection,
                             StringRef CurStrSection,
                             StringRef CurStrOffsetSection, uint16_t Version,
-                            SectionLengths &SectionLength) {
+                            SectionLengths &SectionLength,
+                            const bool ForceDwarf64StringOffsets) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
@@ -442,9 +443,11 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
   uint64_t PrevOffset = 0;
 
   // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
-  // emit a DWARF64 .debug_str_offsets table for this compile unit.
+  // emit a DWARF64 .debug_str_offsets table for this compile unit. If the
+  // \a ForceDwarf64StringOffsets argument is true, then force the emission of
+  // DWARF64 .debug_str_offsets for testing.
   uint32_t OldOffsetSize = 4;
-  uint32_t NewOffsetSize = 4;
+  uint32_t NewOffsetSize = ForceDwarf64StringOffsets ? 8 : 4;
   while (const char *S = Data.getCStr(&LocalOffset)) {
     uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
     OffsetRemapping[PrevOffset] = NewOffset;
@@ -666,7 +669,8 @@ Error handleSection(
 }
 
 Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
-            OnCuIndexOverflow OverflowOptValue) {
+            OnCuIndexOverflow OverflowOptValue,
+            bool ForceDwarf64StringOffsets) {
   const auto &MCOFI = *Out.getContext().getObjectFileInfo();
   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -759,7 +763,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
     }
 
     writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
-                           CurStrOffsetSection, Header.Version, SectionLength);
+                           CurStrOffsetSection, Header.Version, SectionLength,
+                           ForceDwarf64StringOffsets);
 
     for (auto Pair : SectionLength) {
       auto Index = getContributionIndex(Pair.first, IndexVersion);
diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
new file mode 100644
index 0000000000000..f73461b349688
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
@@ -0,0 +1,72 @@
+# This test tests that llvm-dwp can successfully promote .debug_str_offsets to
+# DWARF64. We do this by using a hidden option to llvm-dwp which is
+# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can
+# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64
+# version. This allows us to test the functionality without having to create a
+# 4GB .dwo file.
+
+# RUN: yaml2obj %s -o %t.dwo
+# RUN: llvm-dwp %t.dwo -o %t.32.dwp
+# RUN: llvm-dwp %t.dwo -o %t.64.dwp --force-dwarf64-str-offsets
+# RUN: llvm-dwarfdump --debug-str-offsets %t.32.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.64.dwp | FileCheck --check-prefixes=DWARF64 %s
+
+# DWARF32:      .debug_str_offsets.dwo contents:
+# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
+# DWARF32-NEXT: 0x00000008: 00000000 "main"
+# DWARF32-NEXT: 0x0000000c: 00000005 "int"
+# DWARF32-NEXT: 0x00000010: 00000009 "argc"
+# DWARF32-NEXT: 0x00000014: 0000000e "argv"
+# DWARF32-NEXT: 0x00000018: 00000013 "char"
+# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp"
+# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo"
+
+# DWARF64:      .debug_str_offsets.dwo contents:
+# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5
+# DWARF64-NEXT: 0x00000010: 0000000000000000 "main"
+# DWARF64-NEXT: 0x00000018: 0000000000000005 "int"
+# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc"
+# DWARF64-NEXT: 0x00000028: 000000000000000e "argv"
+# DWARF64-NEXT: 0x00000030: 0000000000000013 "char"
+# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
+# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
+# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .debug_str_offsets.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         '24000000050000000000000005000000090000000E00000013000000180000004600000051000000'
+  - Name:            .debug_str.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00
+  - Name:            .debug_info.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
+  - Name:            .debug_abbrev.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .debug_str_offsets.dwo
+      - Name:            .debug_str.dwo
+      - Name:            .debug_info.dwo
+      - Name:            .debug_abbrev.dwo
+...
diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index 46593bc40ebae..fddeb86fdae3c 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -16,3 +16,7 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
     "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
     "\t\tthe 32 bit/4GB limits of the format.">,
   Values<"continue,soft-stop">;
+def forceDwarf64StringOffsets : Flag<["-", "--"], "force-dwarf64-str-offsets">,
+  Flags<[HelpHidden]>,
+  HelpText<"Force all .debug_str_offsets to be emitted as DWARF64 tables. This "
+    "option is used for testing.">;
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 31bad2d68982b..f735ecac50608 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -73,6 +73,7 @@ class DwpOptTable : public opt::GenericOptTable {
 static std::vector<std::string> ExecFilenames;
 static std::string OutputFilename;
 static std::string ContinueOption;
+static bool ForceDwarf64StringOffsets = false;
 
 static Expected<SmallVector<std::string, 16>>
 getDWOFilenames(StringRef ExecFilename) {
@@ -160,6 +161,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
       }
     }
   }
+  if (Args.getLastArg(OPT_forceDwarf64StringOffsets))
+    ForceDwarf64StringOffsets = true;
 
   for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
     ExecFilenames.emplace_back(A->getValue());
@@ -274,7 +277,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
   if (!MS)
     return error("no object streamer for target " + TripleName, Context);
 
-  if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) {
+  if (auto Err = write(*MS, DWOFilenames, OverflowOptValue,
+                       ForceDwarf64StringOffsets)) {
     logAllUnhandledErrors(std::move(Err), WithColor::error());
     return 1;
   }

>From 011c2c187c73d86bcd3c0e5065d6da8954e99ecd Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 14:51:21 -0800
Subject: [PATCH 4/9] Add a --dwarf64-str-offsets option value.

This option controls if llvm-dwp can promote a .debug_str_offsets table from DWARF32 to DWARF64.

Setting this option value to "enabled" allows promotion of a DWARF32 .debug_str_offsets table to DWARF64 only if any string in the .debug_str_offsets table exceeds UINT32_MAX.

Setting this option value to "disabled" (the default) will keep pre-existing behavior where all .debug_str_offsets tables will be emitted in the same format as are in each .dwo file.

Setting this option value to "always" forces all .debug_str_offsets tables to be emitted as DWARF64 tables. This is used for testing.

Removed the previous --force-dwarf64-str-offsets option.
---
 llvm/include/llvm/DWP/DWP.h                   |  8 +++++-
 llvm/lib/DWP/DWP.cpp                          | 18 ++++++++-----
 .../llvm-dwp/X86/dwarf64-str-offsets.test     | 17 +++++++++---
 llvm/tools/llvm-dwp/Opts.td                   | 14 +++++++---
 llvm/tools/llvm-dwp/llvm-dwp.cpp              | 27 ++++++++++++++++---
 5 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index 1ad4bbcefa988..16c4f9d701072 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -22,6 +22,12 @@ enum OnCuIndexOverflow {
   Continue,
 };
 
+enum Dwarf64StrOffsets {
+  Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
+  Enabled,  ///< Convert any .debug_str_offsets tables to DWARD64 if needed.
+  Always,   ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
+};
+
 struct UnitIndexEntry {
   DWARFUnitIndex::Entry::SectionContribution Contributions[8];
   std::string Name;
@@ -69,7 +75,7 @@ struct CompileUnitIdentifiers {
 
 LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
                      OnCuIndexOverflow OverflowOptValue,
-                     bool ForceDwarf64StringOffsets);
+                     Dwarf64StrOffsets StrOffsetsOptValue);
 
 typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
 
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index a92cf2339506f..758401f33c576 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -430,7 +430,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
                             StringRef CurStrSection,
                             StringRef CurStrOffsetSection, uint16_t Version,
                             SectionLengths &SectionLength,
-                            const bool ForceDwarf64StringOffsets) {
+                            const Dwarf64StrOffsets StrOffsetsOptValue) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
@@ -444,15 +444,19 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
 
   // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
   // emit a DWARF64 .debug_str_offsets table for this compile unit. If the
-  // \a ForceDwarf64StringOffsets argument is true, then force the emission of
-  // DWARF64 .debug_str_offsets for testing.
+  // \a StrOffsetsOptValue argument is Dwarf64StrOffsets::Always, then force
+  // the emission of DWARF64 .debug_str_offsets for testing.
   uint32_t OldOffsetSize = 4;
-  uint32_t NewOffsetSize = ForceDwarf64StringOffsets ? 8 : 4;
+  uint32_t NewOffsetSize =
+      StrOffsetsOptValue == Dwarf64StrOffsets::Always ? 8 : 4;
   while (const char *S = Data.getCStr(&LocalOffset)) {
     uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
     OffsetRemapping[PrevOffset] = NewOffset;
-    if (NewOffset > UINT32_MAX)
+    // Only promote the .debug_str_offsets to DWARF64 if our setting allows it.
+    if (StrOffsetsOptValue != Dwarf64StrOffsets::Disabled &&
+        NewOffset > UINT32_MAX) {
       NewOffsetSize = 8;
+    }
     PrevOffset = LocalOffset;
   }
 
@@ -670,7 +674,7 @@ Error handleSection(
 
 Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
             OnCuIndexOverflow OverflowOptValue,
-            bool ForceDwarf64StringOffsets) {
+            Dwarf64StrOffsets StrOffsetsOptValue) {
   const auto &MCOFI = *Out.getContext().getObjectFileInfo();
   MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
   MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -764,7 +768,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
 
     writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
                            CurStrOffsetSection, Header.Version, SectionLength,
-                           ForceDwarf64StringOffsets);
+                           StrOffsetsOptValue);
 
     for (auto Pair : SectionLength) {
       auto Index = getContributionIndex(Pair.first, IndexVersion);
diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
index f73461b349688..0b75fb15d29fe 100644
--- a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
+++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
@@ -6,10 +6,17 @@
 # 4GB .dwo file.
 
 # RUN: yaml2obj %s -o %t.dwo
-# RUN: llvm-dwp %t.dwo -o %t.32.dwp
-# RUN: llvm-dwp %t.dwo -o %t.64.dwp --force-dwarf64-str-offsets
-# RUN: llvm-dwarfdump --debug-str-offsets %t.32.dwp | FileCheck --check-prefixes=DWARF32 %s
-# RUN: llvm-dwarfdump --debug-str-offsets %t.64.dwp | FileCheck --check-prefixes=DWARF64 %s
+# RUN: llvm-dwp %t.dwo -o %t.dwp
+# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets
+# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets=disabled
+# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets=enabled
+# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets=always
+# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s
+# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s
 
 # DWARF32:      .debug_str_offsets.dwo contents:
 # DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
@@ -33,6 +40,8 @@
 # DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
 # DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"
 
+# ERROR: invalid value for --dwarf64-str-offsets. Valid values are one of: "enabled", "disabled" or "always".
+
 --- !ELF
 FileHeader:
   Class:           ELFCLASS64
diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index fddeb86fdae3c..2f07227e8adba 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -16,7 +16,13 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
     "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
     "\t\tthe 32 bit/4GB limits of the format.">,
   Values<"continue,soft-stop">;
-def forceDwarf64StringOffsets : Flag<["-", "--"], "force-dwarf64-str-offsets">,
-  Flags<[HelpHidden]>,
-  HelpText<"Force all .debug_str_offsets to be emitted as DWARF64 tables. This "
-    "option is used for testing.">;
+
+def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets">;
+def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">,
+  HelpText<"default = disabled, This setting doesn't convert DWARF32 .debug_str_offsets\n"
+    "tables in .dwo files to DWARF64 in the .dwp file. = enabled, This allows .debug_str\n"
+    "tables to exceed the 4GB limit and have any DWARF32 .debug_str_offsets tables\n"
+    "converted to DWARF64 only for tables that require 64 bit string offsets.\n"
+    "= always, This forces all .debug_str_offsets tables to be emitted as DWARF64.\n"
+    "This is used for testing.">,
+  Values<"disabled,enabled,always">;
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index f735ecac50608..546710f30dfad 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -73,7 +73,6 @@ class DwpOptTable : public opt::GenericOptTable {
 static std::vector<std::string> ExecFilenames;
 static std::string OutputFilename;
 static std::string ContinueOption;
-static bool ForceDwarf64StringOffsets = false;
 
 static Expected<SmallVector<std::string, 16>>
 getDWOFilenames(StringRef ExecFilename) {
@@ -126,6 +125,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
   llvm::BumpPtrAllocator A;
   llvm::StringSaver Saver{A};
   OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop;
+  Dwarf64StrOffsets Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled;
+
   opt::InputArgList Args =
       Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
         llvm::errs() << Msg << '\n';
@@ -161,8 +162,26 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
       }
     }
   }
-  if (Args.getLastArg(OPT_forceDwarf64StringOffsets))
-    ForceDwarf64StringOffsets = true;
+
+  if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets,
+                                 OPT_dwarf64StringOffsets_EQ)) {
+    if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) {
+      Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled;
+    } else {
+      std::string OptValue = Arg->getValue();
+      if (OptValue == "disabled") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled;
+      } else if (OptValue == "enabled") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled;
+      } else if (OptValue == "always") {
+        Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Always;
+      } else {
+        llvm::errs() << "invalid value for --dwarf64-str-offsets. Valid values "
+            "are one of: \"enabled\", \"disabled\" or \"always\".\n";
+        exit(1);
+      }
+    }
+  }
 
   for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
     ExecFilenames.emplace_back(A->getValue());
@@ -278,7 +297,7 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
     return error("no object streamer for target " + TripleName, Context);
 
   if (auto Err = write(*MS, DWOFilenames, OverflowOptValue,
-                       ForceDwarf64StringOffsets)) {
+                       Dwarf64StrOffsetsValue)) {
     logAllUnhandledErrors(std::move(Err), WithColor::error());
     return 1;
   }

>From 18707c5a5e1084bdea6599598aee1482def18a54 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 14:56:13 -0800
Subject: [PATCH 5/9] Fix a typo.

---
 llvm/include/llvm/DWP/DWP.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index 16c4f9d701072..f04efd6bbe672 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -24,7 +24,7 @@ enum OnCuIndexOverflow {
 
 enum Dwarf64StrOffsets {
   Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
-  Enabled,  ///< Convert any .debug_str_offsets tables to DWARD64 if needed.
+  Enabled,  ///< Convert any .debug_str_offsets tables to DWARF64 if needed.
   Always,   ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
 };
 

>From c3b53afe675659cac39dc7637b3f16732b6e43b7 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 14:56:53 -0800
Subject: [PATCH 6/9] Clang format.

---
 llvm/tools/llvm-dwp/llvm-dwp.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 546710f30dfad..b5038a1b34204 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -176,8 +176,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
       } else if (OptValue == "always") {
         Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Always;
       } else {
-        llvm::errs() << "invalid value for --dwarf64-str-offsets. Valid values "
-            "are one of: \"enabled\", \"disabled\" or \"always\".\n";
+        llvm::errs()
+            << "invalid value for --dwarf64-str-offsets. Valid values "
+               "are one of: \"enabled\", \"disabled\" or \"always\".\n";
         exit(1);
       }
     }
@@ -296,8 +297,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
   if (!MS)
     return error("no object streamer for target " + TripleName, Context);
 
-  if (auto Err = write(*MS, DWOFilenames, OverflowOptValue,
-                       Dwarf64StrOffsetsValue)) {
+  if (auto Err =
+          write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) {
     logAllUnhandledErrors(std::move(Err), WithColor::error());
     return 1;
   }

>From 6e65e72e614784c1553edd14056dc525ab0fe3aa Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 15:01:07 -0800
Subject: [PATCH 7/9] Fix help text to represent the right default value if
 --dwarf64-str-offsets is specified without a value.

---
 llvm/tools/llvm-dwp/Opts.td | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index 2f07227e8adba..33a4c19cb69d5 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -19,10 +19,9 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
 
 def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets">;
 def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">,
-  HelpText<"default = disabled, This setting doesn't convert DWARF32 .debug_str_offsets\n"
-    "tables in .dwo files to DWARF64 in the .dwp file. = enabled, This allows .debug_str\n"
-    "tables to exceed the 4GB limit and have any DWARF32 .debug_str_offsets tables\n"
-    "converted to DWARF64 only for tables that require 64 bit string offsets.\n"
-    "= always, This forces all .debug_str_offsets tables to be emitted as DWARF64.\n"
-    "This is used for testing.">,
+  HelpText<"default = enabled, This allows .debug_str tables to exceed the 4GB limit\n"
+    "and have any DWARF32 .debug_str_offsets tables converted to DWARF64 only for tables\n"
+    "that require 64 bit string offsets. = disabled, This setting doesn't convert DWARF32\n"
+    " .debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n"
+    "forces all .debug_str_offsets tables to be emitted as DWARF64. This is used for testing.">,
   Values<"disabled,enabled,always">;

>From d4c35f696337dbf33013c88378d6cc3c588ca95e Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 17 Nov 2025 15:03:26 -0800
Subject: [PATCH 8/9] More cleanup on help text for --dwarf64-str-offsets
 option.

---
 llvm/tools/llvm-dwp/Opts.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
index 33a4c19cb69d5..c2e653cbe344d 100644
--- a/llvm/tools/llvm-dwp/Opts.td
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -22,6 +22,6 @@ def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">,
   HelpText<"default = enabled, This allows .debug_str tables to exceed the 4GB limit\n"
     "and have any DWARF32 .debug_str_offsets tables converted to DWARF64 only for tables\n"
     "that require 64 bit string offsets. = disabled, This setting doesn't convert DWARF32\n"
-    " .debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n"
+    ".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n"
     "forces all .debug_str_offsets tables to be emitted as DWARF64. This is used for testing.">,
   Values<"disabled,enabled,always">;

>From 96d894cbac608ae2daf0a4280b0ab4564d38f123 Mon Sep 17 00:00:00 2001
From: Greg Clayton <gclayton at fb.com>
Date: Mon, 17 Nov 2025 17:00:34 -0800
Subject: [PATCH 9/9] Fix llvm-dwp assertion errors.

I was creating a llvm-dwp file that had a .debug_str section that
exceeded 4GB and some asserts were firing.
---
 llvm/lib/DWP/DWP.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index 758401f33c576..d32884858dd30 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -416,12 +416,15 @@ static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
                               DenseMap<uint64_t, uint64_t> &OffsetRemapping,
                               uint64_t &Offset, const uint64_t Size,
                               uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
-
+  // Create a mask so we don't trigger a emitIntValue() assert below if the
+  // NewOffset is over 4GB.
+  const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX;
   while (Offset < Size) {
     const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
     const uint64_t NewOffset = OffsetRemapping[OldOffset];
-    assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX);
-    Out.emitIntValue(NewOffset, NewOffsetSize);
+    // Truncate the string offset like the old llvm-dwp would have if we aren't
+    // promoting the .debug_str_offsets to DWARF64.
+    Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize);
   }
 }
 



More information about the lldb-commits mailing list