[Lldb-commits] [lldb] 8b544f3 - [lldb] Do not use LC_FUNCTION_STARTS data to determine symbol size as symbols are created (#155282)
via lldb-commits
lldb-commits at lists.llvm.org
Tue Aug 26 13:21:31 PDT 2025
Author: Alex Langford
Date: 2025-08-26T13:21:28-07:00
New Revision: 8b544f3639bb51ebccdd212e2acc1847726d7dca
URL: https://github.com/llvm/llvm-project/commit/8b544f3639bb51ebccdd212e2acc1847726d7dca
DIFF: https://github.com/llvm/llvm-project/commit/8b544f3639bb51ebccdd212e2acc1847726d7dca.diff
LOG: [lldb] Do not use LC_FUNCTION_STARTS data to determine symbol size as symbols are created (#155282)
Note: This is a resubmission of #106791. I had to revert this a year ago
for a failing test that I could not understand. I have time now to try
and get this in again.
Summary:
This improves the performance of ObjectFileMacho::ParseSymtab by
removing eager and expensive work in favor of doing it later in a
less-expensive fashion.
Experiment:
My goal was to understand LLDB's startup time.
First, I produced a Debug build of LLDB (no dSYM) and a
Release+NoAsserts build of LLDB. The Release build debugged the Debug
build as it debugged a small C++ program. I found that
ObjectFileMachO::ParseSymtab accounted for somewhere between 1.2 and 1.3
seconds consistently. After applying this change, I consistently
measured a reduction of approximately 100ms, putting the time closer to
1.1s and 1.2s on average.
Background:
ObjectFileMachO::ParseSymtab will incrementally create symbols by
parsing nlist entries from the symtab section of a MachO binary. As it
does this, it eagerly tries to determine the size of symbols (e.g. how
long a function is) using LC_FUNCTION_STARTS data (or eh_frame if
LC_FUNCTION_STARTS is unavailable). Concretely, this is done by
performing a binary search on the function starts array and calculating
the distance to the next function or the end of the section (whichever
is smaller).
However, this work is unnecessary for 2 reasons:
1. If you have debug symbol entries (i.e. STABs), the size of a function
is usually stored right after the function's entry. Performing this work
right before parsing the next entry is unnecessary work.
2. Calculating symbol sizes for symbols of size 0 is already performed
in `Symtab::InitAddressIndexes` after all the symbols are added to the
Symtab. It also does this more efficiently by walking over a list of
symbols sorted by address, so the work to calculate the size per symbol
is constant instead of O(log n).
Added:
Modified:
lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
Removed:
################################################################################
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index d7cb60e3f0c38..924e34053d411 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -2785,7 +2785,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
const char *symbol_name_non_abi_mangled = NULL;
SectionSP symbol_section;
- uint32_t symbol_byte_size = 0;
bool add_nlist = true;
bool is_debug = ((nlist.n_type & N_STAB) != 0);
bool demangled_is_synthesized = false;
@@ -3437,61 +3436,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
if (symbol_section) {
const addr_t section_file_addr =
symbol_section->GetFileAddress();
- if (symbol_byte_size == 0 &&
- function_starts_count > 0) {
- addr_t symbol_lookup_file_addr = nlist.n_value;
- // Do an exact address match for non-ARM addresses,
- // else get the closest since the symbol might be a
- // thumb symbol which has an address with bit zero
- // set
- FunctionStarts::Entry *func_start_entry =
- function_starts.FindEntry(symbol_lookup_file_addr,
- !is_arm);
- if (is_arm && func_start_entry) {
- // Verify that the function start address is the
- // symbol address (ARM) or the symbol address + 1
- // (thumb)
- if (func_start_entry->addr !=
- symbol_lookup_file_addr &&
- func_start_entry->addr !=
- (symbol_lookup_file_addr + 1)) {
- // Not the right entry, NULL it out...
- func_start_entry = NULL;
- }
- }
- if (func_start_entry) {
- func_start_entry->data = true;
-
- addr_t symbol_file_addr = func_start_entry->addr;
- uint32_t symbol_flags = 0;
- if (is_arm) {
- if (symbol_file_addr & 1)
- symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
- symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
- }
-
- const FunctionStarts::Entry *next_func_start_entry =
- function_starts.FindNextEntry(func_start_entry);
- const addr_t section_end_file_addr =
- section_file_addr +
- symbol_section->GetByteSize();
- if (next_func_start_entry) {
- addr_t next_symbol_file_addr =
- next_func_start_entry->addr;
- // Be sure the clear the Thumb address bit when
- // we calculate the size from the current and
- // next address
- if (is_arm)
- next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
- symbol_byte_size = std::min<lldb::addr_t>(
- next_symbol_file_addr - symbol_file_addr,
- section_end_file_addr - symbol_file_addr);
- } else {
- symbol_byte_size =
- section_end_file_addr - symbol_file_addr;
- }
- }
- }
symbol_value -= section_file_addr;
}
@@ -3620,9 +3564,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
- if (symbol_byte_size > 0)
- sym[sym_idx].SetByteSize(symbol_byte_size);
-
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
++sym_idx;
@@ -3711,7 +3652,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
SymbolType type = eSymbolTypeInvalid;
SectionSP symbol_section;
- lldb::addr_t symbol_byte_size = 0;
bool add_nlist = true;
bool is_gsym = false;
bool demangled_is_synthesized = false;
@@ -4297,47 +4237,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
if (symbol_section) {
const addr_t section_file_addr = symbol_section->GetFileAddress();
- if (symbol_byte_size == 0 && function_starts_count > 0) {
- addr_t symbol_lookup_file_addr = nlist.n_value;
- // Do an exact address match for non-ARM addresses, else get the
- // closest since the symbol might be a thumb symbol which has an
- // address with bit zero set.
- FunctionStarts::Entry *func_start_entry =
- function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
- if (is_arm && func_start_entry) {
- // Verify that the function start address is the symbol address
- // (ARM) or the symbol address + 1 (thumb).
- if (func_start_entry->addr != symbol_lookup_file_addr &&
- func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
- // Not the right entry, NULL it out...
- func_start_entry = nullptr;
- }
- }
- if (func_start_entry) {
- func_start_entry->data = true;
-
- addr_t symbol_file_addr = func_start_entry->addr;
- if (is_arm)
- symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
-
- const FunctionStarts::Entry *next_func_start_entry =
- function_starts.FindNextEntry(func_start_entry);
- const addr_t section_end_file_addr =
- section_file_addr + symbol_section->GetByteSize();
- if (next_func_start_entry) {
- addr_t next_symbol_file_addr = next_func_start_entry->addr;
- // Be sure the clear the Thumb address bit when we calculate the
- // size from the current and next address
- if (is_arm)
- next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
- symbol_byte_size = std::min<lldb::addr_t>(
- next_symbol_file_addr - symbol_file_addr,
- section_end_file_addr - symbol_file_addr);
- } else {
- symbol_byte_size = section_end_file_addr - symbol_file_addr;
- }
- }
- }
symbol_value -= section_file_addr;
}
@@ -4444,9 +4343,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
if (nlist.n_desc & N_WEAK_REF)
sym[sym_idx].SetIsWeak(true);
- if (symbol_byte_size > 0)
- sym[sym_idx].SetByteSize(symbol_byte_size);
-
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
@@ -4565,23 +4461,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
Address symbol_addr;
if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
SectionSP symbol_section(symbol_addr.GetSection());
- uint32_t symbol_byte_size = 0;
if (symbol_section) {
- const addr_t section_file_addr = symbol_section->GetFileAddress();
- const FunctionStarts::Entry *next_func_start_entry =
- function_starts.FindNextEntry(func_start_entry);
- const addr_t section_end_file_addr =
- section_file_addr + symbol_section->GetByteSize();
- if (next_func_start_entry) {
- addr_t next_symbol_file_addr = next_func_start_entry->addr;
- if (is_arm)
- next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
- symbol_byte_size = std::min<lldb::addr_t>(
- next_symbol_file_addr - symbol_file_addr,
- section_end_file_addr - symbol_file_addr);
- } else {
- symbol_byte_size = section_end_file_addr - symbol_file_addr;
- }
sym[sym_idx].SetID(synthetic_sym_id++);
// Don't set the name for any synthetic symbols, the Symbol
// object will generate one if needed when the name is accessed
@@ -4593,8 +4473,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
add_symbol_addr(symbol_addr.GetFileAddress());
if (symbol_flags)
sym[sym_idx].SetFlags(symbol_flags);
- if (symbol_byte_size)
- sym[sym_idx].SetByteSize(symbol_byte_size);
++sym_idx;
}
}
More information about the lldb-commits
mailing list