[Lldb-commits] [lldb] d30797b - [lldb] Minidump: check for .text hash match with directory

Joseph Tremoulet via lldb-commits lldb-commits at lists.llvm.org
Fri Oct 16 06:32:57 PDT 2020


Author: Joseph Tremoulet
Date: 2020-10-16T09:32:08-04:00
New Revision: d30797b4041ffe215b92d376af60c4f26a0555ae

URL: https://github.com/llvm/llvm-project/commit/d30797b4041ffe215b92d376af60c4f26a0555ae
DIFF: https://github.com/llvm/llvm-project/commit/d30797b4041ffe215b92d376af60c4f26a0555ae.diff

LOG: [lldb] Minidump: check for .text hash match with directory

When opening a minidump, we might discover that it reports a UUID for a
module that doesn't match the build ID, but rather a hash of the .text
section (according to either of two different hash functions, used by
breakpad and Facebook respectively).  The current logic searches for a
module by filename only to check the hash; this change updates it to
first search by directory+filename.  This is important when the
directory specified in the minidump must be interpreted relative to a
user-provided sysoort, as the leaf directory won't be in the search path
in that case.

Also add a regression test; without this change, module validation fails
because we have just the placeholder module which reports as its path
the platform path in the minidump.

Reviewed By: clayborg

Differential Revision: https://reviews.llvm.org/D89155

Added: 
    lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-decoy.yaml

Modified: 
    lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
    lldb/source/Plugins/Process/minidump/ProcessMinidump.h
    lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index 17fdfdb4f345..26c56e38819f 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -484,6 +484,53 @@ bool ProcessMinidump::UpdateThreadList(ThreadList &old_thread_list,
   return new_thread_list.GetSize(false) > 0;
 }
 
+ModuleSP ProcessMinidump::GetOrCreateModule(UUID minidump_uuid,
+                                            llvm::StringRef name,
+                                            ModuleSpec module_spec) {
+  Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
+  Status error;
+
+  ModuleSP module_sp =
+      GetTarget().GetOrCreateModule(module_spec, true /* notify */, &error);
+  if (!module_sp)
+    return module_sp;
+  // We consider the module to be a match if the minidump UUID is a
+  // prefix of the actual UUID, or if either of the UUIDs are empty.
+  const auto dmp_bytes = minidump_uuid.GetBytes();
+  const auto mod_bytes = module_sp->GetUUID().GetBytes();
+  const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
+                     mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
+  if (match) {
+    LLDB_LOG(log, "Partial uuid match for {0}.", name);
+    return module_sp;
+  }
+
+  // Breakpad generates minindump files, and if there is no GNU build
+  // ID in the binary, it will calculate a UUID by hashing first 4096
+  // bytes of the .text section and using that as the UUID for a module
+  // in the minidump. Facebook uses a modified breakpad client that
+  // uses a slightly modified this hash to avoid collisions. Check for
+  // UUIDs from the minindump that match these cases and accept the
+  // module we find if they do match.
+  std::vector<uint8_t> breakpad_uuid;
+  std::vector<uint8_t> facebook_uuid;
+  HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
+  if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
+    LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
+    return module_sp;
+  }
+  if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
+    LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
+    return module_sp;
+  }
+  // The UUID wasn't a partial match and didn't match the .text hash
+  // so remove the module from the target, we will need to create a
+  // placeholder object file.
+  GetTarget().GetImages().Remove(module_sp);
+  module_sp.reset();
+  return module_sp;
+}
+
 void ProcessMinidump::ReadModuleList() {
   std::vector<const minidump::Module *> filtered_modules =
       m_minidump_parser->GetFilteredModuleList();
@@ -513,54 +560,22 @@ void ProcessMinidump::ReadModuleList() {
     // add the module to the target if it finds one.
     lldb::ModuleSP module_sp = GetTarget().GetOrCreateModule(module_spec,
                                                      true /* notify */, &error);
-    if (!module_sp) {
-      // Try and find a module without specifying the UUID and only looking for
-      // the file given a basename. We then will look for a partial UUID match
-      // if we find any matches. This function will add the module to the
-      // target if it finds one, so we need to remove the module from the target
-      // if the UUID doesn't match during our manual UUID verification. This
-      // allows the "target.exec-search-paths" setting to specify one or more
-      // directories that contain executables that can be searched for matches.
-      ModuleSpec basename_module_spec(module_spec);
-      basename_module_spec.GetUUID().Clear();
-      basename_module_spec.GetFileSpec().GetDirectory().Clear();
-      module_sp = GetTarget().GetOrCreateModule(basename_module_spec,
-                                                true /* notify */, &error);
-      if (module_sp) {
-        // We consider the module to be a match if the minidump UUID is a
-        // prefix of the actual UUID, or if either of the UUIDs are empty.
-        const auto dmp_bytes = uuid.GetBytes();
-        const auto mod_bytes = module_sp->GetUUID().GetBytes();
-        const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
-            mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
-        if (!match) {
-          // Breakpad generates minindump files, and if there is no GNU build
-          // ID in the binary, it will calculate a UUID by hashing first 4096
-          // bytes of the .text section and using that as the UUID for a module
-          // in the minidump. Facebook uses a modified breakpad client that
-          // uses a slightly modified this hash to avoid collisions. Check for
-          // UUIDs from the minindump that match these cases and accept the
-          // module we find if they do match.
-          std::vector<uint8_t> breakpad_uuid;
-          std::vector<uint8_t> facebook_uuid;
-          HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
-          if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
-            LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
-          } else if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
-            LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
-          } else {
-            // The UUID wasn't a partial match and didn't match the .text hash
-            // so remove the module from the target, we will need to create a
-            // placeholder object file.
-            GetTarget().GetImages().Remove(module_sp);
-            module_sp.reset();
-          }
-        } else {
-          LLDB_LOG(log, "Partial uuid match for {0}.", name);
-        }
-      }
-    } else {
+    if (module_sp) {
       LLDB_LOG(log, "Full uuid match for {0}.", name);
+    } else {
+      // We couldn't find a module with an exactly-matching UUID.  Sometimes
+      // a minidump UUID is only a partial match or is a hash.  So try again
+      // without specifying the UUID, then again without specifying the
+      // directory if that fails.  This will allow us to find modules with
+      // partial matches or hash UUIDs in user-provided sysroots or search
+      // directories (target.exec-search-paths).
+      ModuleSpec partial_module_spec = module_spec;
+      partial_module_spec.GetUUID().Clear();
+      module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
+      if (!module_sp) {
+        partial_module_spec.GetFileSpec().GetDirectory().Clear();
+        module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
+      }
     }
     if (module_sp) {
       // Watch out for place holder modules that have 
diff erent paths, but the

diff  --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
index 839b0e7563f7..bfdace7ea33e 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
@@ -102,6 +102,10 @@ class ProcessMinidump : public Process {
 
   void ReadModuleList();
 
+  lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid,
+                                   llvm::StringRef name,
+                                   lldb_private::ModuleSpec module_spec);
+
   JITLoaderList &GetJITLoaders() override;
 
 private:

diff  --git a/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py b/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py
index 1dc2c81485d0..abd62500e7d6 100644
--- a/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py
+++ b/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py
@@ -22,11 +22,14 @@ class MiniDumpUUIDTestCase(TestBase):
     def verify_module(self, module, verify_path, verify_uuid):
         # Compare the filename and the directory separately. We are avoiding
         # SBFileSpec.fullpath because it causes a slash/backslash confusion
-        # on Windows.
+        # on Windows.  Similarly, we compare the directories using normcase
+        # because they may contain a Linux-style relative path from the
+        # minidump appended to a Windows-style root path from the host.
         self.assertEqual(
             os.path.basename(verify_path), module.GetFileSpec().basename)
         self.assertEqual(
-            os.path.dirname(verify_path), module.GetFileSpec().dirname or "")
+            os.path.normcase(os.path.dirname(verify_path)),
+            os.path.normcase(module.GetFileSpec().dirname or ""))
         self.assertEqual(verify_uuid, module.GetUUIDString())
 
     def get_minidump_modules(self, yaml_file):
@@ -201,6 +204,50 @@ def test_breakpad_hash_match(self):
         # will check that this matches.
         self.verify_module(modules[0], so_path, "D9C480E8")
 
+    def test_breakpad_hash_match_sysroot(self):
+        """
+            Check that we can match the breakpad .text section hash when the
+            module is located under a user-provided sysroot.
+        """
+        sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
+        # Create the directory under the sysroot where the minidump reports
+        # the module.
+        so_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
+        so_path = os.path.join(so_dir, "libbreakpad.so")
+        lldbutil.mkdir_p(so_dir)
+        self.yaml2obj("libbreakpad.yaml", so_path)
+        self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
+        modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
+        self.assertEqual(1, len(modules))
+        # LLDB makes up its own UUID as well when there is no build ID so we
+        # will check that this matches.
+        self.verify_module(modules[0], so_path, "D9C480E8")
+
+    def test_breakpad_hash_match_sysroot_decoy(self):
+        """
+            Check that we can match the breakpad .text section hash when there is
+            a module with the right name but wrong contents under a user-provided
+            sysroot, and the right module is at the given search path..
+        """
+        sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
+        # Create the directory under the sysroot where the minidump reports
+        # the module.
+        decoy_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
+        decoy_path = os.path.join(decoy_dir, "libbreakpad.so")
+        lldbutil.mkdir_p(decoy_dir)
+        self.yaml2obj("libbreakpad-decoy.yaml", decoy_path)
+        self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
+        so_dir = os.path.join(self.getBuildDir(), "searchpath_dir")
+        so_path = os.path.join(so_dir, "libbreakpad.so")
+        lldbutil.mkdir_p(so_dir)
+        self.yaml2obj("libbreakpad.yaml", so_path)
+        self.runCmd('settings set target.exec-search-paths "%s"' % so_dir)
+        modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
+        self.assertEqual(1, len(modules))
+        # LLDB makes up its own UUID as well when there is no build ID so we
+        # will check that this matches.
+        self.verify_module(modules[0], so_path, "D9C480E8")
+
     def test_breakpad_overflow_hash_match(self):
         """
             This is a similar to test_breakpad_hash_match, but it verifies that

diff  --git a/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-decoy.yaml b/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-decoy.yaml
new file mode 100644
index 000000000000..028a12f54a09
--- /dev/null
+++ b/lldb/test/API/functionalities/postmortem/minidump-new/libbreakpad-decoy.yaml
@@ -0,0 +1,18 @@
+# This has 
diff erent .text contents than libbreakpad-yaml,
+# to simulate having 
diff erent versions of the module (to
+# test that we pick the one matching the minidump UUID).
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_ARM
+  Flags:           [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
+Sections:
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0000000000010000
+    AddressAlign:    0x0000000000000004
+    Content:         040000001400000003000000474E5500CC


        


More information about the lldb-commits mailing list