[Lldb-commits] [lldb] Enable LLDB to load large dSYM files. (PR #164471)

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Fri Oct 24 16:00:19 PDT 2025


https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/164471

>From 1673e1b93b9c4e5a99323ca9bdf8232c6d5fdfad Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Tue, 21 Oct 2025 11:19:30 -0700
Subject: [PATCH 1/6] Enable LLDB to load large dSYM files.

llvm-dsymutil can produce mach-o files where some sections in __DWARF exceed the 4GB barrier and subsequent sections in the dSYM will be inaccessible because the mach-o section_64 structure only has a 32 bit file offset. This patch enables LLDB to load a large dSYM file by figuring out when this happens and properly adjusting the file offset of the LLDB sections.

I was unable to add a test as obj2yaml and yaml2obj are broken for mach-o files and they can't convert a yaml file back into a valid mach-o object file. Any suggestions for adding a test would be appreciated.
---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 9cdb8467bfc60..6878f7331e0f5 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1674,6 +1674,10 @@ void ObjectFileMachO::ProcessSegmentCommand(
   uint32_t segment_sect_idx;
   const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
 
+  // dSYM files can create sections whose data exceeds the 4GB barrier, but
+  // mach-o sections only have 32 bit offsets. So keep track of when we
+  // overflow and fix the sections offsets as we iterate.
+  uint64_t section_offset_adjust = 0;
   const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
   for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
        ++segment_sect_idx) {
@@ -1697,6 +1701,14 @@ void ObjectFileMachO::ProcessSegmentCommand(
     // isn't stored in the abstracted Sections.
     m_mach_sections.push_back(sect64);
 
+    // Make sure we can load dSYM files whose __DWARF sections exceed the 4GB
+    // barrier. llvm::MachO::section_64 have only 32 bit file offsets for the
+    // section contents.
+    const uint64_t section_file_offset = sect64.offset + section_offset_adjust;
+    // If this section overflows a 4GB barrier, then we need to adjust any
+    // subsequent the section offsets.
+    if (is_dsym && ((uint64_t)sect64.offset + sect64.size) >= UINT32_MAX)
+      section_offset_adjust += 0x100000000ull;
     if (add_section) {
       ConstString section_name(
           sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
@@ -1736,13 +1748,13 @@ void ObjectFileMachO::ProcessSegmentCommand(
           }
 
           // Grow the section size as needed.
-          if (sect64.offset) {
+          if (section_file_offset) {
             const lldb::addr_t segment_min_file_offset =
                 segment->GetFileOffset();
             const lldb::addr_t segment_max_file_offset =
                 segment_min_file_offset + segment->GetFileSize();
 
-            const lldb::addr_t section_min_file_offset = sect64.offset;
+            const lldb::addr_t section_min_file_offset = section_file_offset;
             const lldb::addr_t section_max_file_offset =
                 section_min_file_offset + sect64.size;
             const lldb::addr_t new_file_offset =
@@ -1770,9 +1782,9 @@ void ObjectFileMachO::ProcessSegmentCommand(
               sect64.addr, // File VM address == addresses as they are
               // found in the object file
               sect64.size,   // VM size in bytes of this section
-              sect64.offset, // Offset to the data for this section in
+              section_file_offset, // Offset to the data for this section in
               // the file
-              sect64.offset ? sect64.size : 0, // Size in bytes of
+              section_file_offset ? sect64.size : 0, // Size in bytes of
               // this section as
               // found in the file
               sect64.align,
@@ -1792,14 +1804,14 @@ void ObjectFileMachO::ProcessSegmentCommand(
       SectionSP section_sp(new Section(
           segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
           sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
-          sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
-          sect64.flags));
+          section_file_offset, section_file_offset == 0 ? 0 : sect64.size,
+          sect64.align, sect64.flags));
       // Set the section to be encrypted to match the segment
 
       bool section_is_encrypted = false;
       if (!segment_is_encrypted && load_cmd.filesize != 0)
         section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
-                                   sect64.offset) != nullptr;
+                                   section_file_offset) != nullptr;
 
       section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
       section_sp->SetPermissions(segment_permissions);

>From 6dac04becfbb6cba6b7a404c2b73625de36b3c92 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 24 Oct 2025 11:40:58 -0700
Subject: [PATCH 2/6] Address review comments.

- Fix a case where a section can be larger that 4GB
- Fix comments to be a bit more clear
- Don't only do this for dSYM files
---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 6878f7331e0f5..1040b58d767b6 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1674,9 +1674,9 @@ void ObjectFileMachO::ProcessSegmentCommand(
   uint32_t segment_sect_idx;
   const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
 
-  // dSYM files can create sections whose data exceeds the 4GB barrier, but
-  // mach-o sections only have 32 bit offsets. So keep track of when we
-  // overflow and fix the sections offsets as we iterate.
+  // 64 bit mach-o files have sections with 32 bit file offsets. If any section
+  // data end will exceed UINT32_MAX, then we need to do some bookkeeping to
+  // ensure we can access this data correctly.
   uint64_t section_offset_adjust = 0;
   const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
   for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
@@ -1701,14 +1701,15 @@ void ObjectFileMachO::ProcessSegmentCommand(
     // isn't stored in the abstracted Sections.
     m_mach_sections.push_back(sect64);
 
-    // Make sure we can load dSYM files whose __DWARF sections exceed the 4GB
-    // barrier. llvm::MachO::section_64 have only 32 bit file offsets for the
-    // section contents.
+    // Make sure we can load sections in mach-o files where some sections cross
+    // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets
+    // for the file offset of the section contents, so we need to track and
+    // sections that overflow and adjust the offsets accordingly.
     const uint64_t section_file_offset = sect64.offset + section_offset_adjust;
-    // If this section overflows a 4GB barrier, then we need to adjust any
-    // subsequent the section offsets.
-    if (is_dsym && ((uint64_t)sect64.offset + sect64.size) >= UINT32_MAX)
-      section_offset_adjust += 0x100000000ull;
+    const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size;
+    if (end_section_offset >= UINT32_MAX)
+      section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull;
+
     if (add_section) {
       ConstString section_name(
           sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));

>From 9fa860c9d40013c64bb2622ca9f934e8d45426de Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 24 Oct 2025 11:45:13 -0700
Subject: [PATCH 3/6] Run clang format.

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 1040b58d767b6..caf2d66315748 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1782,7 +1782,7 @@ void ObjectFileMachO::ProcessSegmentCommand(
               // other sections.
               sect64.addr, // File VM address == addresses as they are
               // found in the object file
-              sect64.size,   // VM size in bytes of this section
+              sect64.size,         // VM size in bytes of this section
               section_file_offset, // Offset to the data for this section in
               // the file
               section_file_offset ? sect64.size : 0, // Size in bytes of

>From 8e448dad8736c69096c54355b55c3417c910afa9 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 24 Oct 2025 15:10:54 -0700
Subject: [PATCH 4/6] Cast to uint64_t to be clear what we are doing.

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index caf2d66315748..29285facaa475 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1705,7 +1705,7 @@ void ObjectFileMachO::ProcessSegmentCommand(
     // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets
     // for the file offset of the section contents, so we need to track and
     // sections that overflow and adjust the offsets accordingly.
-    const uint64_t section_file_offset = sect64.offset + section_offset_adjust;
+    const uint64_t section_file_offset = (uint64_t)sect64.offset + section_offset_adjust;
     const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size;
     if (end_section_offset >= UINT32_MAX)
       section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull;

>From 01fda3979d187b76469a68ffd33a89358e17e9ec Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 24 Oct 2025 15:11:51 -0700
Subject: [PATCH 5/6] clang format.

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 29285facaa475..c8e520d687f67 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1705,7 +1705,8 @@ void ObjectFileMachO::ProcessSegmentCommand(
     // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets
     // for the file offset of the section contents, so we need to track and
     // sections that overflow and adjust the offsets accordingly.
-    const uint64_t section_file_offset = (uint64_t)sect64.offset + section_offset_adjust;
+    const uint64_t section_file_offset =
+        (uint64_t)sect64.offset + section_offset_adjust;
     const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size;
     if (end_section_offset >= UINT32_MAX)
       section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull;

>From 1be09602233ccdd7642aa63b9290f953277c37fb Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 24 Oct 2025 15:58:02 -0700
Subject: [PATCH 6/6] Add a test.

The binary is as minimal as possible and it contains 1 segment named "__DWARF" with 3 sections:

FILE OFF    INDEX ADDRESS            SIZE               OFFSET     ALIGN      RELOFF     NRELOC     FLAGS      RESERVED1  RESERVED2  RESERVED3  NAME
=========== ===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------
0x00000068: [  1] 0x00000000fffffff0 0x0000000000000020 0xfffffff0 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_abbrev
0x000000b8: [  2] 0x0000000100000010 0x0000000200000000 0x00000010 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_info
0x00000108: [  3] 0x0000000300000010 0x0000000000000020 0x00000010 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_line

The file offsets should be parsed correctly by LLDB as:

__debug_abbrev file_offset=0x00000000fffffff0
__debug_info file_offset=0x0000000100000010
__debug_line file_offset=0x0000000300000010
---
 .../MachO/Inputs/section-overflow-binary          | Bin 0 -> 344 bytes
 .../ObjectFile/MachO/section-overflow-binary.test |  13 +++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary
 create mode 100644 lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test

diff --git a/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary
new file mode 100644
index 0000000000000000000000000000000000000000..19dc2f4ac9ffe55c414b1f37817099f07846ad00
GIT binary patch
literal 344
zcmX^A>+L at t1_nk3Am9RG5W@mUv@$3FSqu!4Kn&u?$Ge0(2DyR7K*Wdt|Np}{Oh|kO
zs31F#W&>hoC_g?vB{iuuJw7ohsVKD!w|NRs^&s;>=E4BTUC01tUS?ieK1dwheF8u+
em>!ThAU-kX<z(ii0+nJl59AjGkR%WgG7kV(1ucvK

literal 0
HcmV?d00001

diff --git a/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test
new file mode 100644
index 0000000000000..76c335f65a76a
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test
@@ -0,0 +1,13 @@
+RUN: %lldb -b %p/Inputs/section-overflow-binary \
+RUN:   -o 'script dwarf = lldb.target.module[0].sections[0]' \
+RUN:   -o 'script section = dwarf.GetSubSectionAtIndex(0)' \
+RUN:   -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \
+RUN:   -o 'script section = dwarf.GetSubSectionAtIndex(1)' \
+RUN:   -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \
+RUN:   -o 'script section = dwarf.GetSubSectionAtIndex(2)' \
+RUN:   -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \
+RUN:   | FileCheck %s
+
+CHECK: __debug_abbrev file_offset=0x00000000fffffff0
+CHECK: __debug_info file_offset=0x0000000100000010
+CHECK: __debug_line file_offset=0x0000000300000010



More information about the lldb-commits mailing list