[Lldb-commits] [lldb] b0572ab - Improve performance when parsing symbol tables in mach-o files.
Greg Clayton via lldb-commits
lldb-commits at lists.llvm.org
Wed Jun 2 10:31:43 PDT 2021
Author: Greg Clayton
Date: 2021-06-02T10:31:37-07:00
New Revision: b0572abf72fd4aafbb56bc41350e41bdfd96cdde
URL: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde
DIFF: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde.diff
LOG: Improve performance when parsing symbol tables in mach-o files.
Some larger projects were loading quite slowly with the current LLDB on macOS and macOS simulator builds. I did some instrument traces and found 3 main culprits:
- a LLDB timer that was put into a function that was called too often
- a std::set that was keeping track of the address of symbols that were already added
- a unnamed function generator in ObjectFile that was going slow due to allocations
In order to see this in action I ran the latest LLDB on a large application with many frameworks using the following method:
(lldb) script import time; start_time = time.perf_counter()
(lldb) file Large.app
(lldb) script print(time.perf_counter() - start_time)
I first range "sudo purge" to clear the system file caches to simulate a cold startup of the debugger, followed by two iterations with warm file caches.
Prior to this fix I was seeing the following timings:
17.68 (cold)
14.56 (warm 1)
14.52 (warm 2)
After this fix I was seeing:
11.32 (cold)
8.43 (warm 1)
8.49 (warm 2)
Differential Revision: https://reviews.llvm.org/D103504
Added:
Modified:
lldb/source/Core/Module.cpp
lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
lldb/source/Symbol/ObjectFile.cpp
Removed:
################################################################################
diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index 117b0bfd5aae3..64193b57b095f 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -440,8 +440,6 @@ CompUnitSP Module::GetCompileUnitAtIndex(size_t index) {
bool Module::ResolveFileAddress(lldb::addr_t vm_addr, Address &so_addr) {
std::lock_guard<std::recursive_mutex> guard(m_mutex);
- LLDB_SCOPED_TIMERF("Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")",
- vm_addr);
SectionList *section_list = GetSectionList();
if (section_list)
return so_addr.ResolveAddressUsingFileSections(vm_addr, section_list);
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index f346f5c01c2f4..0285e336df804 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -44,6 +44,7 @@
#include "lldb/Host/SafeMachO.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -1895,9 +1896,9 @@ class MachSymtabSectionInfo {
filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
Host::SystemLog(Host::eSystemLogError,
- "error: unable to find section %d for a symbol in %s, corrupt file?\n",
- n_sect,
- filename.c_str());
+ "error: unable to find section %d for a symbol in "
+ "%s, corrupt file?\n",
+ n_sect, filename.c_str());
}
}
if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
@@ -2183,7 +2184,16 @@ size_t ObjectFileMachO::ParseSymtab() {
// We add symbols to the table in the order of most information (nlist
// records) to least (function starts), and avoid duplicating symbols
// via this set.
- std::set<addr_t> symbols_added;
+ llvm::DenseSet<addr_t> symbols_added;
+
+ // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
+ // do not add the tombstone or empty keys to the set.
+ auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
+ // Don't add the tombstone or empty keys.
+ if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
+ return;
+ symbols_added.insert(file_addr);
+ };
FunctionStarts function_starts;
lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
uint32_t i;
@@ -3640,9 +3650,9 @@ size_t ObjectFileMachO::ParseSymtab() {
symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(
symbol_value);
- symbols_added.insert(sym[GSYM_sym_idx]
- .GetAddress()
- .GetFileAddress());
+ add_symbol_addr(sym[GSYM_sym_idx]
+ .GetAddress()
+ .GetFileAddress());
// We just need the flags from the linker
// symbol, so put these flags
// into the N_GSYM flags to avoid duplicate
@@ -3662,7 +3672,7 @@ size_t ObjectFileMachO::ParseSymtab() {
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
- symbols_added.insert(
+ add_symbol_addr(
sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
@@ -4475,7 +4485,7 @@ size_t ObjectFileMachO::ParseSymtab() {
// invalid address of zero when the global is a common symbol.
sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
- symbols_added.insert(
+ add_symbol_addr(
sym[GSYM_sym_idx].GetAddress().GetFileAddress());
// We just need the flags from the linker symbol, so put these
// flags into the N_GSYM flags to avoid duplicate symbols in
@@ -4494,7 +4504,8 @@ size_t ObjectFileMachO::ParseSymtab() {
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
- symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress());
+ if (symbol_section)
+ add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
if (nlist.n_desc & N_WEAK_REF)
@@ -4590,7 +4601,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].SetExternal(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
- symbols_added.insert(symbol_addr.GetFileAddress());
+ add_symbol_addr(symbol_addr.GetFileAddress());
if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
++sym_idx;
@@ -4645,7 +4656,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetType(eSymbolTypeCode);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
- symbols_added.insert(symbol_addr.GetFileAddress());
+ add_symbol_addr(symbol_addr.GetFileAddress());
if (symbol_flags)
sym[sym_idx].SetFlags(symbol_flags);
if (symbol_byte_size)
@@ -4746,7 +4757,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetType(eSymbolTypeResolver);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = so_addr;
- symbols_added.insert(so_addr.GetFileAddress());
+ add_symbol_addr(so_addr.GetFileAddress());
sym[sym_idx].SetByteSize(symbol_stub_byte_size);
++sym_idx;
}
diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index f5dcbc5467f72..b0fdd50b3c0f1 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -617,11 +617,13 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name,
}
ConstString ObjectFile::GetNextSyntheticSymbolName() {
- StreamString ss;
+ llvm::SmallString<256> name;
+ llvm::raw_svector_ostream os(name);
ConstString file_name = GetModule()->GetFileSpec().GetFilename();
- ss.Printf("___lldb_unnamed_symbol%u$$%s", ++m_synthetic_symbol_idx,
- file_name.GetCString());
- return ConstString(ss.GetString());
+ ++m_synthetic_symbol_idx;
+ os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
+ << file_name.GetStringRef();
+ return ConstString(os.str());
}
std::vector<ObjectFile::LoadableData>
More information about the lldb-commits
mailing list