[llvm] Fix getting section info in large mach-o files. (PR #165940)

Greg Clayton via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 2 16:13:55 PST 2025


https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/165940

>From 159cfd761e09e61949b9abf2b63ac756f6bf0fdf Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Fri, 31 Oct 2025 16:26:12 -0700
Subject: [PATCH 1/2] Fix getting section info in large mach-o files.

Mach-o has 32 bit file offsets in the MachO::section_64 structs. dSYM files can contain sections whose start offset exceeds UINT32_MAX, which means the MachO::section_64.offset will get truncated. We can calculate when this happens and properly adjust the section offset to be 64 bit safe. This means tools can get the correct section contents for large dSYM files and allows tools that parse DWARF, like llvm-gsymutil, to be able to load and convert these files correctly.
---
 llvm/include/llvm/Object/MachO.h    |  2 +-
 llvm/lib/Object/MachOObjectFile.cpp | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 01e7c6b07dd36..f4c1e30b097ee 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -447,7 +447,7 @@ class LLVM_ABI MachOObjectFile : public ObjectFile {
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const;
+  ArrayRef<uint8_t> getSectionContents(uint64_t Offset, uint64_t Size) const;
   Expected<ArrayRef<uint8_t>>
   getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index e09dc947c2779..300a5f7ed2a48 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -1978,20 +1978,34 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
   return SectSize;
 }
 
-ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint32_t Offset,
+ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint64_t Offset,
                                                       uint64_t Size) const {
   return arrayRefFromStringRef(getData().substr(Offset, Size));
 }
 
 Expected<ArrayRef<uint8_t>>
 MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
-  uint32_t Offset;
+  uint64_t Offset;
   uint64_t Size;
 
   if (is64Bit()) {
     MachO::section_64 Sect = getSection64(Sec);
     Offset = Sect.offset;
     Size = Sect.size;
+    // Check for large mach-o files where the section contents might exceed
+    // 4GB. MachO::section_64 objects only have 32 bit file offsets to the
+    // section contents and can overflow in dSYM files. We can track this and
+    // adjust the section offset to be 64 bit safe.
+    uint64_t SectOffsetAdjust = 0;
+    for (uint32_t SectIdx=0; SectIdx<Sec.d.a; ++SectIdx) {
+      MachO::section_64 CurrSect =
+          getStruct<MachO::section_64>(*this, Sections[SectIdx]);
+      const uint64_t EndSectFileOffset =
+          (uint64_t)CurrSect.offset + CurrSect.size;
+      if (EndSectFileOffset >= UINT32_MAX)
+        SectOffsetAdjust += EndSectFileOffset & 0xFFFFFFFF00000000ull;
+    }
+    Offset += SectOffsetAdjust;
   } else {
     MachO::section Sect = getSection(Sec);
     Offset = Sect.offset;

>From 457d287486e3494f435cfe1422eaf56c974f6ce2 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Sun, 2 Nov 2025 16:13:46 -0800
Subject: [PATCH 2/2] Update llvm/lib/Object/MachOObjectFile.cpp

Make sure sections are ordered when the cross the UINT32_MAX barrier

Co-authored-by: Peter Rong <peterrong96 at gmail.com>
---
 llvm/lib/Object/MachOObjectFile.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 300a5f7ed2a48..6ebf0a845836f 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -1996,14 +1996,19 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
     // 4GB. MachO::section_64 objects only have 32 bit file offsets to the
     // section contents and can overflow in dSYM files. We can track this and
     // adjust the section offset to be 64 bit safe.
+    // Assumes the sections are ordered.
+    uint64_t PrevTrueOffset = 0;
     uint64_t SectOffsetAdjust = 0;
     for (uint32_t SectIdx=0; SectIdx<Sec.d.a; ++SectIdx) {
       MachO::section_64 CurrSect =
           getStruct<MachO::section_64>(*this, Sections[SectIdx]);
+       uint64_t CurrTrueOffset = (uint64_t) CurrSect.offset | SectOffsetAdjust;
+      assert(SectOffsetAdjust == 0 || (PrevTrueOffset <= CurrTrueOffset) && "Overflowing sections must be ordered for adjustment")
       const uint64_t EndSectFileOffset =
           (uint64_t)CurrSect.offset + CurrSect.size;
       if (EndSectFileOffset >= UINT32_MAX)
         SectOffsetAdjust += EndSectFileOffset & 0xFFFFFFFF00000000ull;
+      PrevTrueOffset = CurrTrueOffset;
     }
     Offset += SectOffsetAdjust;
   } else {



More information about the llvm-commits mailing list