[Lldb-commits] [lldb] b0572ab - Improve performance when parsing symbol tables in mach-o files.

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Wed Jun 2 10:31:43 PDT 2021


Author: Greg Clayton
Date: 2021-06-02T10:31:37-07:00
New Revision: b0572abf72fd4aafbb56bc41350e41bdfd96cdde

URL: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde
DIFF: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde.diff

LOG: Improve performance when parsing symbol tables in mach-o files.

Some larger projects were loading quite slowly with the current LLDB on macOS and macOS simulator builds. I did some instrument traces and found 3 main culprits:
- a LLDB timer that was put into a function that was called too often
- a std::set that was keeping track of the address of symbols that were already added
- a unnamed function generator in ObjectFile that was going slow due to allocations

In order to see this in action I ran the latest LLDB on a large application with many frameworks using the following method:

(lldb) script import time; start_time = time.perf_counter()
(lldb) file Large.app
(lldb) script print(time.perf_counter() - start_time)

I first range "sudo purge" to clear the system file caches to simulate a cold startup of the debugger, followed by two iterations with warm file caches.

Prior to this fix I was seeing the following timings:

17.68 (cold)
14.56 (warm 1)
14.52 (warm 2)

After this fix I was seeing:

11.32 (cold)
8.43 (warm 1)
8.49 (warm 2)

Differential Revision: https://reviews.llvm.org/D103504

Added: 
    

Modified: 
    lldb/source/Core/Module.cpp
    lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
    lldb/source/Symbol/ObjectFile.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index 117b0bfd5aae3..64193b57b095f 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -440,8 +440,6 @@ CompUnitSP Module::GetCompileUnitAtIndex(size_t index) {
 
 bool Module::ResolveFileAddress(lldb::addr_t vm_addr, Address &so_addr) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
-  LLDB_SCOPED_TIMERF("Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")",
-                     vm_addr);
   SectionList *section_list = GetSectionList();
   if (section_list)
     return so_addr.ResolveAddressUsingFileSections(vm_addr, section_list);

diff  --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index f346f5c01c2f4..0285e336df804 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -44,6 +44,7 @@
 
 #include "lldb/Host/SafeMachO.h"
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 
@@ -1895,9 +1896,9 @@ class MachSymtabSectionInfo {
             filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
 
           Host::SystemLog(Host::eSystemLogError,
-                          "error: unable to find section %d for a symbol in %s, corrupt file?\n",
-                          n_sect, 
-                          filename.c_str());
+                          "error: unable to find section %d for a symbol in "
+                          "%s, corrupt file?\n",
+                          n_sect, filename.c_str());
         }
       }
       if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
@@ -2183,7 +2184,16 @@ size_t ObjectFileMachO::ParseSymtab() {
   // We add symbols to the table in the order of most information (nlist
   // records) to least (function starts), and avoid duplicating symbols
   // via this set.
-  std::set<addr_t> symbols_added;
+  llvm::DenseSet<addr_t> symbols_added;
+
+  // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
+  // do not add the tombstone or empty keys to the set.
+  auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
+    // Don't add the tombstone or empty keys.
+    if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
+      return;
+    symbols_added.insert(file_addr);
+  };
   FunctionStarts function_starts;
   lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
   uint32_t i;
@@ -3640,9 +3650,9 @@ size_t ObjectFileMachO::ParseSymtab() {
                                     symbol_section);
                                 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
                                     symbol_value);
-                                symbols_added.insert(sym[GSYM_sym_idx]
-                                                         .GetAddress()
-                                                         .GetFileAddress());
+                                add_symbol_addr(sym[GSYM_sym_idx]
+                                                    .GetAddress()
+                                                    .GetFileAddress());
                                 // We just need the flags from the linker
                                 // symbol, so put these flags
                                 // into the N_GSYM flags to avoid duplicate
@@ -3662,7 +3672,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                       if (set_value) {
                         sym[sym_idx].GetAddressRef().SetSection(symbol_section);
                         sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
-                        symbols_added.insert(
+                        add_symbol_addr(
                             sym[sym_idx].GetAddress().GetFileAddress());
                       }
                       sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
@@ -4475,7 +4485,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                 // invalid address of zero when the global is a common symbol.
                 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
                 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
-                symbols_added.insert(
+                add_symbol_addr(
                     sym[GSYM_sym_idx].GetAddress().GetFileAddress());
                 // We just need the flags from the linker symbol, so put these
                 // flags into the N_GSYM flags to avoid duplicate symbols in
@@ -4494,7 +4504,8 @@ size_t ObjectFileMachO::ParseSymtab() {
       if (set_value) {
         sym[sym_idx].GetAddressRef().SetSection(symbol_section);
         sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
-        symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress());
+        if (symbol_section)
+          add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
       }
       sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
       if (nlist.n_desc & N_WEAK_REF)
@@ -4590,7 +4601,7 @@ size_t ObjectFileMachO::ParseSymtab() {
         sym[sym_idx].SetIsSynthetic(true);
         sym[sym_idx].SetExternal(true);
         sym[sym_idx].GetAddressRef() = symbol_addr;
-        symbols_added.insert(symbol_addr.GetFileAddress());
+        add_symbol_addr(symbol_addr.GetFileAddress());
         if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
           sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
         ++sym_idx;
@@ -4645,7 +4656,7 @@ size_t ObjectFileMachO::ParseSymtab() {
               sym[sym_idx].SetType(eSymbolTypeCode);
               sym[sym_idx].SetIsSynthetic(true);
               sym[sym_idx].GetAddressRef() = symbol_addr;
-              symbols_added.insert(symbol_addr.GetFileAddress());
+              add_symbol_addr(symbol_addr.GetFileAddress());
               if (symbol_flags)
                 sym[sym_idx].SetFlags(symbol_flags);
               if (symbol_byte_size)
@@ -4746,7 +4757,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                     sym[sym_idx].SetType(eSymbolTypeResolver);
                   sym[sym_idx].SetIsSynthetic(true);
                   sym[sym_idx].GetAddressRef() = so_addr;
-                  symbols_added.insert(so_addr.GetFileAddress());
+                  add_symbol_addr(so_addr.GetFileAddress());
                   sym[sym_idx].SetByteSize(symbol_stub_byte_size);
                   ++sym_idx;
                 }

diff  --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index f5dcbc5467f72..b0fdd50b3c0f1 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -617,11 +617,13 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name,
 }
 
 ConstString ObjectFile::GetNextSyntheticSymbolName() {
-  StreamString ss;
+  llvm::SmallString<256> name;
+  llvm::raw_svector_ostream os(name);
   ConstString file_name = GetModule()->GetFileSpec().GetFilename();
-  ss.Printf("___lldb_unnamed_symbol%u$$%s", ++m_synthetic_symbol_idx,
-            file_name.GetCString());
-  return ConstString(ss.GetString());
+  ++m_synthetic_symbol_idx;
+  os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
+     << file_name.GetStringRef();
+  return ConstString(os.str());
 }
 
 std::vector<ObjectFile::LoadableData>


        


More information about the lldb-commits mailing list