[Lldb-commits] [lldb] ac49e90 - jGetLoadedDynamicLibrariesInfos can inspect machos not yet loaded

Jason Molenda via lldb-commits lldb-commits at lists.llvm.org
Thu Jul 14 00:56:21 PDT 2022


Author: Jason Molenda
Date: 2022-07-14T00:56:14-07:00
New Revision: ac49e9021919d2a356dd10b39888f168736a43b0

URL: https://github.com/llvm/llvm-project/commit/ac49e9021919d2a356dd10b39888f168736a43b0
DIFF: https://github.com/llvm/llvm-project/commit/ac49e9021919d2a356dd10b39888f168736a43b0.diff

LOG: jGetLoadedDynamicLibrariesInfos can inspect machos not yet loaded

jGetLoadedDynamicLibrariesInfos normally checks with dyld to find
the list of binaries loaded in the inferior, and getting the filepath,
before trying to parse the Mach-O binary in inferior memory.
This allows for debugserver to parse a Mach-O binary present in memory,
but not yet registered with dyld.  This patch also adds some simple
sanity checks that we're reading a Mach-O header before we begin
stepping through load commands, because we won't have the sanity check
of consulting dyld for the list of loaded binaries before parsing.
Also adds a testcase.

[This patch was reverted after causing a testsuite failure on a CI bot;
I haven't been able to repro the failure outside the CI, but I have a
theory that my sanity check on cputype which only matched arm64 and
x86_64 - and the CI machine may have a watch simulator that is still
using i386.]

Differential Revision: https://reviews.llvm.org/D128956
rdar://95737734

Added: 
    lldb/test/API/macosx/unregistered-macho/Makefile
    lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
    lldb/test/API/macosx/unregistered-macho/main.c

Modified: 
    lldb/tools/debugserver/source/MacOSX/MachProcess.h
    lldb/tools/debugserver/source/MacOSX/MachProcess.mm

Removed: 
    


################################################################################
diff  --git a/lldb/test/API/macosx/unregistered-macho/Makefile b/lldb/test/API/macosx/unregistered-macho/Makefile
new file mode 100644
index 0000000000000..58c3a468659e3
--- /dev/null
+++ b/lldb/test/API/macosx/unregistered-macho/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES = main.c
+
+include Makefile.rules

diff  --git a/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py b/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
new file mode 100644
index 0000000000000..0c3bb50aec89a
--- /dev/null
+++ b/lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
@@ -0,0 +1,47 @@
+"""Test that debugserver will parse a mach-o in inferior memory even if it's not loaded."""
+
+import os
+import re
+import subprocess
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestUnregisteredMacho(TestBase):
+
+    # newer debugserver required for jGetLoadedDynamicLibrariesInfos 
+    # to support this
+    @skipIfOutOfTreeDebugserver  
+    @no_debug_info_test
+    @skipUnlessDarwin
+    def test(self):
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "// break here", lldb.SBFileSpec("main.c"))
+
+        frame = thread.GetFrameAtIndex(0)
+        macho_buf = frame.GetValueForVariablePath("macho_buf")
+        macho_addr = macho_buf.GetValueAsUnsigned()
+        invalid_macho_addr = macho_buf.GetValueAsUnsigned() + 4
+        gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % macho_addr
+
+        # Send the jGetLoadedDynamicLibrariesInfos packet
+        # to debugserver, asking it to parse the mach-o binary
+        # at this address and give us the UUID etc, even though
+        # dyld doesn't think there is a binary at that address.
+        # We won't get a pathname for the binary (from dyld), but
+        # we will get to the LC_UUID and include that.
+        self.expect (gdb_packet, substrs=['"pathname":""', '"uuid":"1B4E28BA-2FA1-11D2-883F-B9A761BDE3FB"'])
+
+        no_macho_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % invalid_macho_addr
+        self.expect (no_macho_gdb_packet, substrs=['response: {"images":[]}'])
+
+        # Test that we get back the information for the properly
+        # formatted Mach-O binary in memory, but do not get an
+        # entry for the invalid Mach-O address.
+        both_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d,%d]}]'" % (macho_addr, invalid_macho_addr)
+        self.expect (both_gdb_packet, substrs=['"load_address":%d,' % macho_addr])
+        self.expect (both_gdb_packet, substrs=['"load_address":%d,' % invalid_macho_addr], matching=False)
+

diff  --git a/lldb/test/API/macosx/unregistered-macho/main.c b/lldb/test/API/macosx/unregistered-macho/main.c
new file mode 100644
index 0000000000000..5de4b5f642467
--- /dev/null
+++ b/lldb/test/API/macosx/unregistered-macho/main.c
@@ -0,0 +1,63 @@
+#include <mach-o/loader.h>
+#include <mach/machine.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uuid/uuid.h>
+
+int main() {
+  int size_of_load_cmds =
+      sizeof(struct segment_command_64) + sizeof(struct uuid_command);
+  uint8_t *macho_buf =
+      (uint8_t *)malloc(sizeof(struct mach_header_64) + size_of_load_cmds);
+  uint8_t *p = macho_buf;
+  struct mach_header_64 mh;
+  mh.magic = MH_MAGIC_64;
+  mh.cputype = CPU_TYPE_ARM64;
+  mh.cpusubtype = 0;
+  mh.filetype = MH_EXECUTE;
+  mh.ncmds = 2;
+  mh.sizeofcmds = size_of_load_cmds;
+  mh.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE;
+
+  memcpy(p, &mh, sizeof(mh));
+  p += sizeof(mh);
+
+  struct segment_command_64 seg;
+  seg.cmd = LC_SEGMENT_64;
+  seg.cmdsize = sizeof(seg);
+  strcpy(seg.segname, "__TEXT");
+  seg.vmaddr = 0x5000;
+  seg.vmsize = 0x1000;
+  seg.fileoff = 0;
+  seg.filesize = 0;
+  seg.maxprot = 0;
+  seg.initprot = 0;
+  seg.nsects = 0;
+  seg.flags = 0;
+
+  memcpy(p, &seg, sizeof(seg));
+  p += sizeof(seg);
+
+  struct uuid_command uuid;
+  uuid.cmd = LC_UUID;
+  uuid.cmdsize = sizeof(uuid);
+  uuid_clear(uuid.uuid);
+  uuid_parse("1b4e28ba-2fa1-11d2-883f-b9a761bde3fb", uuid.uuid);
+
+  memcpy(p, &uuid, sizeof(uuid));
+  p += sizeof(uuid);
+
+  // If this needs to be debugged, the memory buffer can be written
+  // to a file with
+  // (lldb) mem rea -b -o /tmp/t -c `p - macho_buf` macho_buf
+  // (lldb) platform shell otool -hlv /tmp/t
+  // to verify that it is well formed.
+
+  // And inside lldb, it should be inspectable via
+  // (lldb) script print(lldb.frame.locals["macho_buf"][0].GetValueAsUnsigned())
+  // 105553162403968
+  // (lldb) process plugin packet send
+  // 'jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[105553162403968]}]'
+
+  return 0; // break here
+}

diff  --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.h b/lldb/tools/debugserver/source/MacOSX/MachProcess.h
index 3b250bf5ec901..97efb5985ab4a 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachProcess.h
+++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.h
@@ -73,9 +73,11 @@ class MachProcess {
     uint64_t load_address;
     uint64_t mod_date; // may not be available - 0 if so
     struct mach_o_information macho_info;
+    bool is_valid_mach_header;
 
     binary_image_information()
-        : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0) {}
+        : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0),
+          is_valid_mach_header(false) {}
   };
 
   // Child process control

diff  --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
index b41cb22bb202d..c12e07040a604 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
+++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
@@ -746,6 +746,17 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   return nullptr;
 }
 
+static bool mach_header_validity_test(uint32_t magic, uint32_t cputype) {
+  if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 &&
+      magic != MH_CIGAM_64)
+    return false;
+  if (cputype != CPU_TYPE_I386 && cputype != CPU_TYPE_X86_64 &&
+      cputype != CPU_TYPE_ARM && cputype != CPU_TYPE_ARM64 &&
+      cputype != CPU_TYPE_ARM64_32)
+    return false;
+  return true;
+}
+
 // Given an address, read the mach-o header and load commands out of memory to
 // fill in
 // the mach_o_information "inf" object.
@@ -757,12 +768,16 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
     uint32_t dyld_platform, nub_addr_t mach_o_header_addr, int wordsize,
     struct mach_o_information &inf) {
   uint64_t load_cmds_p;
+
   if (wordsize == 4) {
     struct mach_header header;
     if (ReadMemory(mach_o_header_addr, sizeof(struct mach_header), &header) !=
         sizeof(struct mach_header)) {
       return false;
     }
+    if (!mach_header_validity_test(header.magic, header.cputype))
+      return false;
+
     load_cmds_p = mach_o_header_addr + sizeof(struct mach_header);
     inf.mach_header.magic = header.magic;
     inf.mach_header.cputype = header.cputype;
@@ -779,6 +794,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
                    &header) != sizeof(struct mach_header_64)) {
       return false;
     }
+    if (!mach_header_validity_test(header.magic, header.cputype))
+      return false;
     load_cmds_p = mach_o_header_addr + sizeof(struct mach_header_64);
     inf.mach_header.magic = header.magic;
     inf.mach_header.cputype = header.cputype;
@@ -896,6 +913,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   const size_t image_count = image_infos.size();
 
   for (size_t i = 0; i < image_count; i++) {
+    if (!image_infos[i].is_valid_mach_header)
+      continue;
     JSONGenerator::DictionarySP image_info_dict_sp(
         new JSONGenerator::Dictionary());
     image_info_dict_sp->AddIntegerItem("load_address",
@@ -970,7 +989,6 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   }
 
   JSONGenerator::DictionarySP reply_sp(new JSONGenerator::Dictionary());
-  ;
   reply_sp->AddItem("images", image_infos_array_sp);
 
   return reply_sp;
@@ -985,8 +1003,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
 // information.
 JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos(
     nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) {
-  JSONGenerator::DictionarySP reply_sp;
 
+  JSONGenerator::ObjectSP empty_reply_sp(new JSONGenerator::Dictionary());
   int pointer_size = GetInferiorAddrSize(pid);
 
   std::vector<struct binary_image_information> image_infos;
@@ -994,91 +1012,89 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
 
   uint8_t *image_info_buf = (uint8_t *)malloc(image_infos_size);
   if (image_info_buf == NULL) {
-    return reply_sp;
+    return empty_reply_sp;
+  }
+  if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
+      image_infos_size) {
+    return empty_reply_sp;
   }
-    if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
-        image_infos_size) {
-      return reply_sp;
-    }
 
-    ////  First the image_infos array with (load addr, pathname, mod date)
-    ///tuples
-
-    for (size_t i = 0; i < image_count; i++) {
-      struct binary_image_information info;
-      nub_addr_t pathname_address;
-      if (pointer_size == 4) {
-        uint32_t load_address_32;
-        uint32_t pathname_address_32;
-        uint32_t mod_date_32;
-        ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
-        ::memcpy(&pathname_address_32,
-                 image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
-        ::memcpy(&mod_date_32, image_info_buf + (i * 3 * pointer_size) +
-                                   pointer_size + pointer_size,
-                 4);
-        info.load_address = load_address_32;
-        info.mod_date = mod_date_32;
-        pathname_address = pathname_address_32;
-      } else {
-        uint64_t load_address_64;
-        uint64_t pathname_address_64;
-        uint64_t mod_date_64;
-        ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
-        ::memcpy(&pathname_address_64,
-                 image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
-        ::memcpy(&mod_date_64, image_info_buf + (i * 3 * pointer_size) +
-                                   pointer_size + pointer_size,
-                 8);
-        info.load_address = load_address_64;
-        info.mod_date = mod_date_64;
-        pathname_address = pathname_address_64;
-      }
-      char strbuf[17];
-      info.filename = "";
-      uint64_t pathname_ptr = pathname_address;
-      bool still_reading = true;
-      while (still_reading &&
-             ReadMemory(pathname_ptr, sizeof(strbuf) - 1, strbuf) ==
-                 sizeof(strbuf) - 1) {
-        strbuf[sizeof(strbuf) - 1] = '\0';
-        info.filename += strbuf;
-        pathname_ptr += sizeof(strbuf) - 1;
-        // Stop if we found nul byte indicating the end of the string
-        for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
-          if (strbuf[i] == '\0') {
-            still_reading = false;
-            break;
-          }
+  /// First the image_infos array with (load addr, pathname, mod date)
+  /// tuples
+
+  for (size_t i = 0; i < image_count; i++) {
+    struct binary_image_information info;
+    nub_addr_t pathname_address;
+    if (pointer_size == 4) {
+      uint32_t load_address_32;
+      uint32_t pathname_address_32;
+      uint32_t mod_date_32;
+      ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
+      ::memcpy(&pathname_address_32,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
+      ::memcpy(&mod_date_32,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size +
+                   pointer_size,
+               4);
+      info.load_address = load_address_32;
+      info.mod_date = mod_date_32;
+      pathname_address = pathname_address_32;
+    } else {
+      uint64_t load_address_64;
+      uint64_t pathname_address_64;
+      uint64_t mod_date_64;
+      ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
+      ::memcpy(&pathname_address_64,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
+      ::memcpy(&mod_date_64,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size +
+                   pointer_size,
+               8);
+      info.load_address = load_address_64;
+      info.mod_date = mod_date_64;
+      pathname_address = pathname_address_64;
+    }
+    char strbuf[17];
+    info.filename = "";
+    uint64_t pathname_ptr = pathname_address;
+    bool still_reading = true;
+    while (still_reading && ReadMemory(pathname_ptr, sizeof(strbuf) - 1,
+                                       strbuf) == sizeof(strbuf) - 1) {
+      strbuf[sizeof(strbuf) - 1] = '\0';
+      info.filename += strbuf;
+      pathname_ptr += sizeof(strbuf) - 1;
+      // Stop if we found nul byte indicating the end of the string
+      for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
+        if (strbuf[i] == '\0') {
+          still_reading = false;
+          break;
         }
       }
-      uuid_clear(info.macho_info.uuid);
-      image_infos.push_back(info);
-    }
-    if (image_infos.size() == 0) {
-      return reply_sp;
     }
+    uuid_clear(info.macho_info.uuid);
+    image_infos.push_back(info);
+  }
+  if (image_infos.size() == 0) {
+    return empty_reply_sp;
+  }
 
-    free(image_info_buf);
+  free(image_info_buf);
 
-    ////  Second, read the mach header / load commands for all the dylibs
+  ///  Second, read the mach header / load commands for all the dylibs
 
-    for (size_t i = 0; i < image_count; i++) {
-      // The SPI to provide platform is not available on older systems.
-      uint32_t platform = 0;
-      if (!GetMachOInformationFromMemory(platform,
-                                         image_infos[i].load_address,
-                                         pointer_size,
-                                         image_infos[i].macho_info)) {
-        return reply_sp;
-      }
+  for (size_t i = 0; i < image_count; i++) {
+    // The SPI to provide platform is not available on older systems.
+    uint32_t platform = 0;
+    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                      pointer_size,
+                                      image_infos[i].macho_info)) {
+      image_infos[i].is_valid_mach_header = true;
     }
+  }
 
-    ////  Third, format all of the above in the JSONGenerator object.
-
-    return FormatDynamicLibrariesIntoJSON(image_infos);
+  ///  Third, format all of the above in the JSONGenerator object.
 
-  return reply_sp;
+  return FormatDynamicLibrariesIntoJSON(image_infos);
 }
 
 /// From dyld SPI header dyld_process_info.h
@@ -1141,7 +1157,6 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
 // macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
 JSONGenerator::ObjectSP
 MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
-  JSONGenerator::DictionarySP reply_sp;
 
   int pointer_size = GetInferiorAddrSize(pid);
   std::vector<struct binary_image_information> image_infos;
@@ -1149,8 +1164,11 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   uint32_t platform = GetPlatform();
   const size_t image_count = image_infos.size();
   for (size_t i = 0; i < image_count; i++) {
-    GetMachOInformationFromMemory(platform, image_infos[i].load_address,
-                                  pointer_size, image_infos[i].macho_info);
+    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                      pointer_size,
+                                      image_infos[i].macho_info)) {
+      image_infos[i].is_valid_mach_header = true;
+    }
   }
     return FormatDynamicLibrariesIntoJSON(image_infos);
 }
@@ -1160,10 +1178,11 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
 // dyld SPIs that exist in macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
 JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses(
     nub_process_t pid, std::vector<uint64_t> &macho_addresses) {
-  JSONGenerator::DictionarySP reply_sp;
 
   int pointer_size = GetInferiorAddrSize(pid);
 
+  // Collect the list of all binaries that dyld knows about in
+  // the inferior process.
   std::vector<struct binary_image_information> all_image_infos;
   GetAllLoadedBinariesViaDYLDSPI(all_image_infos);
   uint32_t platform = GetPlatform();
@@ -1171,19 +1190,35 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   std::vector<struct binary_image_information> image_infos;
   const size_t macho_addresses_count = macho_addresses.size();
   const size_t all_image_infos_count = all_image_infos.size();
+
   for (size_t i = 0; i < macho_addresses_count; i++) {
+    bool found_matching_entry = false;
     for (size_t j = 0; j < all_image_infos_count; j++) {
       if (all_image_infos[j].load_address == macho_addresses[i]) {
         image_infos.push_back(all_image_infos[j]);
+        found_matching_entry = true;
       }
     }
+    if (!found_matching_entry) {
+      // dyld doesn't think there is a binary at this address,
+      // but maybe there isn't a binary YET - let's look in memory
+      // for a proper mach-o header etc and return what we can.
+      // We will have an empty filename for the binary (because dyld
+      // doesn't know about it yet) but we can read all of the mach-o
+      // load commands from memory directly.
+      struct binary_image_information entry;
+      entry.load_address = macho_addresses[i];
+      image_infos.push_back(entry);
+    }
   }
 
     const size_t image_infos_count = image_infos.size();
     for (size_t i = 0; i < image_infos_count; i++) {
-      GetMachOInformationFromMemory(platform,
-                                    image_infos[i].load_address, pointer_size,
-                                    image_infos[i].macho_info);
+      if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                        pointer_size,
+                                        image_infos[i].macho_info)) {
+        image_infos[i].is_valid_mach_header = true;
+      }
     }
     return FormatDynamicLibrariesIntoJSON(image_infos);
 }


        


More information about the lldb-commits mailing list