[Lldb-commits] [lldb] [lldb][Mach-O] Handle shared cache binaries correctly (PR #117832)

Jason Molenda via lldb-commits lldb-commits at lists.llvm.org
Wed Nov 27 13:43:09 PST 2024


https://github.com/jasonmolenda updated https://github.com/llvm/llvm-project/pull/117832

>From 00a429c14d159ebc42ac7c3a7e98a91851ece236 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda at apple.com>
Date: Tue, 26 Nov 2024 17:56:06 -0800
Subject: [PATCH 1/3] [lldb][Mach-O] Handle shared cache binaries correctly

The Mach-O load commands have an LC_SYMTAB / struct symtab_command
which represents the offset of the symbol table (nlist records) and
string table for this binary.  In a mach-o binary on disk, these are
file offsets.  If a mach-o binary is loaded in memory with all
segments consecutive, the `symoff` and `stroff` are the offsets from
the TEXT segment (aka the mach-o header) virtual address to the
virtual address of the start of these tables.

However, if a Mach-O binary is a part of the shared cache, then the
segments will be separated -- they will have different slide values.
And it is possible for the LINKEDIT segment to be greater than 4GB
away from the TEXT segment in the virtual address space, so these
32-bit offsets cannot express the offset from TEXT segment to these
tables.

Create separate uint64_t variables to track the offset to the
symbol table and string table, instead of reusing the 32-bit ones
in the symtab_command structure.

rdar://140432279
---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 079fd905037d45..5f047d84d53e73 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -2244,6 +2244,18 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
   // code.
   typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
 
+  // The virtual address offset from TEXT to the symbol/string tables
+  // in the LINKEDIT section.  The LC_SYMTAB symtab_command `symoff` and
+  // `stroff` are uint32_t's that give the file offset in the binary.
+  // If the binary is laid down in memory with all segments consecutive,
+  // then these are the offsets from the mach-o header aka TEXT segment
+  // to the tables' virtual addresses.
+  // But if the binary is loaded in virtual address space with different
+  // slides for the segments (e.g. a shared cache), the LINKEDIT may be
+  // more than 4GB away from TEXT, and a 32-bit offset is not sufficient.
+  offset_t symbol_table_offset_from_TEXT = 0;
+  offset_t string_table_offset_from_TEXT = 0;
+
   // Record the address of every function/data that we add to the symtab.
   // We add symbols to the table in the order of most information (nlist
   // records) to least (function starts), and avoid duplicating symbols
@@ -2282,6 +2294,8 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
       if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
           nullptr) // fill in symoff, nsyms, stroff, strsize fields
         return;
+      string_table_offset_from_TEXT = symtab_load_command.stroff;
+      symbol_table_offset_from_TEXT = symtab_load_command.symoff;
       break;
 
     case LC_DYLD_INFO:
@@ -2403,9 +2417,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
 
       const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
       const addr_t symoff_addr = linkedit_load_addr +
-                                 symtab_load_command.symoff -
+                                 symbol_table_offset_from_TEXT -
                                  linkedit_file_offset;
-      strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
+      strtab_addr = linkedit_load_addr + string_table_offset_from_TEXT -
                     linkedit_file_offset;
 
       // Always load dyld - the dynamic linker - from memory if we didn't
@@ -2473,17 +2487,17 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
       lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
       lldb::offset_t linkedit_slide =
           linkedit_offset - m_linkedit_original_offset;
-      symtab_load_command.symoff += linkedit_slide;
-      symtab_load_command.stroff += linkedit_slide;
+      symbol_table_offset_from_TEXT += linkedit_slide;
+      string_table_offset_from_TEXT += linkedit_slide;
       dyld_info.export_off += linkedit_slide;
       dysymtab.indirectsymoff += linkedit_slide;
       function_starts_load_command.dataoff += linkedit_slide;
       exports_trie_load_command.dataoff += linkedit_slide;
     }
 
-    nlist_data.SetData(m_data, symtab_load_command.symoff,
+    nlist_data.SetData(m_data, symbol_table_offset_from_TEXT,
                        nlist_data_byte_size);
-    strtab_data.SetData(m_data, symtab_load_command.stroff,
+    strtab_data.SetData(m_data, string_table_offset_from_TEXT,
                         strtab_data_byte_size);
 
     // We shouldn't have exports data from both the LC_DYLD_INFO command

>From 5761360c2ec87187efe50e43c2ddd1dfe425e07a Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda at apple.com>
Date: Wed, 27 Nov 2024 13:35:49 -0800
Subject: [PATCH 2/3] Define a local version of symtab_command w/ larger
 offsets

---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 36 ++++++-------------
 .../ObjectFile/Mach-O/ObjectFileMachO.h       | 13 +++++++
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 5f047d84d53e73..da87491fa32af4 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -2235,27 +2235,15 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
   LLDB_LOG(log, "Parsing symbol table for {0}", file_name);
   Progress progress("Parsing symbol table", file_name);
 
-  llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
   llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
   llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
   llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
+  SymtabCommandLargeOffsets symtab_load_command;
   // The data element of type bool indicates that this entry is thumb
   // code.
   typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
 
-  // The virtual address offset from TEXT to the symbol/string tables
-  // in the LINKEDIT section.  The LC_SYMTAB symtab_command `symoff` and
-  // `stroff` are uint32_t's that give the file offset in the binary.
-  // If the binary is laid down in memory with all segments consecutive,
-  // then these are the offsets from the mach-o header aka TEXT segment
-  // to the tables' virtual addresses.
-  // But if the binary is loaded in virtual address space with different
-  // slides for the segments (e.g. a shared cache), the LINKEDIT may be
-  // more than 4GB away from TEXT, and a 32-bit offset is not sufficient.
-  offset_t symbol_table_offset_from_TEXT = 0;
-  offset_t string_table_offset_from_TEXT = 0;
-
   // Record the address of every function/data that we add to the symtab.
   // We add symbols to the table in the order of most information (nlist
   // records) to least (function starts), and avoid duplicating symbols
@@ -2290,12 +2278,10 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
     case LC_SYMTAB:
       symtab_load_command.cmd = lc.cmd;
       symtab_load_command.cmdsize = lc.cmdsize;
-      // Read in the rest of the symtab load command
-      if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
-          nullptr) // fill in symoff, nsyms, stroff, strsize fields
-        return;
-      string_table_offset_from_TEXT = symtab_load_command.stroff;
-      symbol_table_offset_from_TEXT = symtab_load_command.symoff;
+      symtab_load_command.symoff = m_data.GetU32(&offset);
+      symtab_load_command.nsyms = m_data.GetU32(&offset);
+      symtab_load_command.stroff = m_data.GetU32(&offset);
+      symtab_load_command.strsize = m_data.GetU32(&offset);
       break;
 
     case LC_DYLD_INFO:
@@ -2417,9 +2403,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
 
       const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
       const addr_t symoff_addr = linkedit_load_addr +
-                                 symbol_table_offset_from_TEXT -
+                                 symtab_load_command.symoff -
                                  linkedit_file_offset;
-      strtab_addr = linkedit_load_addr + string_table_offset_from_TEXT -
+      strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
                     linkedit_file_offset;
 
       // Always load dyld - the dynamic linker - from memory if we didn't
@@ -2487,17 +2473,17 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
       lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
       lldb::offset_t linkedit_slide =
           linkedit_offset - m_linkedit_original_offset;
-      symbol_table_offset_from_TEXT += linkedit_slide;
-      string_table_offset_from_TEXT += linkedit_slide;
+      symtab_load_command.symoff += linkedit_slide;
+      symtab_load_command.stroff += linkedit_slide;
       dyld_info.export_off += linkedit_slide;
       dysymtab.indirectsymoff += linkedit_slide;
       function_starts_load_command.dataoff += linkedit_slide;
       exports_trie_load_command.dataoff += linkedit_slide;
     }
 
-    nlist_data.SetData(m_data, symbol_table_offset_from_TEXT,
+    nlist_data.SetData(m_data, symtab_load_command.symoff,
                        nlist_data_byte_size);
-    strtab_data.SetData(m_data, string_table_offset_from_TEXT,
+    strtab_data.SetData(m_data, symtab_load_command.stroff,
                         strtab_data_byte_size);
 
     // We shouldn't have exports data from both the LC_DYLD_INFO command
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
index be87112df7d898..27b2078b5a3fcd 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
@@ -256,6 +256,19 @@ class ObjectFileMachO : public lldb_private::ObjectFile {
     bool IsValid() { return all_image_infos.size() > 0; }
   };
 
+  // The LC_SYMTAB's symtab_command structure uses 32-bit file offsets
+  // for two fields, but ObjectFileMachO needs to calculate the offsets
+  // in virtual address layout from the start of the TEXT segment, and
+  // that span may be larger than 4GB.
+  struct SymtabCommandLargeOffsets {
+    uint32_t cmd = 0;          /* LC_SYMTAB */
+    uint32_t cmdsize = 0;      /* sizeof(struct symtab_command) */
+    lldb::offset_t symoff = 0; /* symbol table offset */
+    uint32_t nsyms = 0;        /* number of symbol table entries */
+    lldb::offset_t stroff = 0; /* string table offset */
+    uint32_t strsize = 0;      /* string table size in bytes */
+  };
+
   /// Get the list of binary images that were present in the process
   /// when the corefile was produced.
   /// \return

>From 8e7dd931f9fa26eb4282e3f9acaebd975cde55a0 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda at apple.com>
Date: Wed, 27 Nov 2024 13:42:44 -0800
Subject: [PATCH 3/3] Add comment with symtab_command decl to clarify reads

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index da87491fa32af4..4aa85a99edf01e 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -2276,6 +2276,14 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
     // Watch for the symbol table load command
     switch (lc.cmd) {
     case LC_SYMTAB:
+      // struct symtab_command {
+      //   uint32_t        cmd;            /* LC_SYMTAB */
+      //   uint32_t        cmdsize;        /* sizeof(struct symtab_command) */
+      //   uint32_t        symoff;         /* symbol table offset */
+      //   uint32_t        nsyms;          /* number of symbol table entries */
+      //   uint32_t        stroff;         /* string table offset */
+      //   uint32_t        strsize;        /* string table size in bytes */
+      // };
       symtab_load_command.cmd = lc.cmd;
       symtab_load_command.cmdsize = lc.cmdsize;
       symtab_load_command.symoff = m_data.GetU32(&offset);



More information about the lldb-commits mailing list