[Lldb-commits] [lldb] [LLDB] Fix potential data race in Symtab initialization (PR #192753)

Adrian Prantl via lldb-commits lldb-commits at lists.llvm.org
Sat Apr 18 09:53:45 PDT 2026


https://github.com/adrian-prantl updated https://github.com/llvm/llvm-project/pull/192753

>From b9e13b131918d265fb782431527b6bdae069c298 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Fri, 17 Apr 2026 16:00:49 -0700
Subject: [PATCH] [LLDB] Fix potential data race in Symtab initialization

Claude pointed out to me that Symtab::FindFunctionSymbols doesn't lock
the mutex before checking m_name_indexes_computed and recomputing
it. On top of that all the initialization flags are bitfields, which
makes any unguarded concurrent accesses UB. Changing them to bools
should no longer be necessary after introducing a lock, but several of
the public methods trust that their caller holds the lock so I'm
opting to remove this footgun just in case.

rdar://174988238
---
 lldb/include/lldb/Symbol/Symtab.h |  10 +-
 lldb/source/Symbol/Symtab.cpp     | 214 +++++++++++++++---------------
 2 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h
index 57627d2dde7d2..9c7173c1153a3 100644
--- a/lldb/include/lldb/Symbol/Symtab.h
+++ b/lldb/include/lldb/Symbol/Symtab.h
@@ -277,10 +277,12 @@ class Symtab {
   /// Maps function names to symbol indices (grouped by FunctionNameTypes)
   std::map<lldb::FunctionNameType, UniqueCStringMap<uint32_t>>
       m_name_to_symbol_indices;
-  mutable std::recursive_mutex
-      m_mutex; // Provide thread safety for this symbol table
-  bool m_file_addr_to_index_computed : 1, m_name_indexes_computed : 1,
-    m_loaded_from_cache : 1, m_saved_to_cache : 1;
+  /// Provide thread safety for this symbol table.
+  mutable std::recursive_mutex m_mutex;
+  bool m_file_addr_to_index_computed = false;
+  bool m_name_indexes_computed = false;
+  bool m_loaded_from_cache = false;
+  bool m_saved_to_cache = false;
 
 private:
   UniqueCStringMap<uint32_t> &
diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index c7cd900f41cbb..21890d8a125cb 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -280,116 +280,117 @@ static bool lldb_skip_name(llvm::StringRef mangled,
 
 void Symtab::InitNameIndexes() {
   // Protected function, no need to lock mutex...
-  if (!m_name_indexes_computed) {
-    m_name_indexes_computed = true;
-    ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
-    LLDB_SCOPED_TIMER();
+  if (m_name_indexes_computed)
+    return;
 
-    // Collect all loaded language plugins.
-    std::vector<Language *> languages;
-    Language::ForEach([&languages](Language *l) {
-      languages.push_back(l);
-      return IterationAction::Continue;
-    });
+  m_name_indexes_computed = true;
+  ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
+  LLDB_SCOPED_TIMER();
 
-    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
-    auto &basename_to_index =
-        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
-    auto &method_to_index =
-        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
-    auto &selector_to_index =
-        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
-    // Create the name index vector to be able to quickly search by name
-    const size_t num_symbols = m_symbols.size();
-    name_to_index.Reserve(num_symbols);
-
-    // The "const char *" in "class_contexts" and backlog::value_type::second
-    // must come from a ConstString::GetCString()
-    std::set<const char *> class_contexts;
-    std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
-    backlog.reserve(num_symbols / 2);
-
-    // Instantiation of the demangler is expensive, so better use a single one
-    // for all entries during batch processing.
-    RichManglingContext rmc;
-    for (uint32_t value = 0; value < num_symbols; ++value) {
-      Symbol *symbol = &m_symbols[value];
-
-      // Don't let trampolines get into the lookup by name map If we ever need
-      // the trampoline symbols to be searchable by name we can remove this and
-      // then possibly add a new bool to any of the Symtab functions that
-      // lookup symbols by name to indicate if they want trampolines. We also
-      // don't want any synthetic symbols with auto generated names in the
-      // name lookups.
-      if (symbol->IsTrampoline() || symbol->IsSyntheticWithAutoGeneratedName())
-        continue;
+  // Collect all loaded language plugins.
+  std::vector<Language *> languages;
+  Language::ForEach([&languages](Language *l) {
+    languages.push_back(l);
+    return IterationAction::Continue;
+  });
 
-      // If the symbol's name string matched a Mangled::ManglingScheme, it is
-      // stored in the mangled field.
-      Mangled &mangled = symbol->GetMangled();
-      if (ConstString name = mangled.GetMangledName()) {
-        name_to_index.Append(name, value);
+  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
+  auto &basename_to_index =
+      GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
+  auto &method_to_index =
+      GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
+  auto &selector_to_index =
+      GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
+  // Create the name index vector to be able to quickly search by name
+  const size_t num_symbols = m_symbols.size();
+  name_to_index.Reserve(num_symbols);
+
+  // The "const char *" in "class_contexts" and backlog::value_type::second
+  // must come from a ConstString::GetCString()
+  std::set<const char *> class_contexts;
+  std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
+  backlog.reserve(num_symbols / 2);
+
+  // Instantiation of the demangler is expensive, so better use a single one
+  // for all entries during batch processing.
+  RichManglingContext rmc;
+  for (uint32_t value = 0; value < num_symbols; ++value) {
+    Symbol *symbol = &m_symbols[value];
+
+    // Don't let trampolines get into the lookup by name map If we ever need
+    // the trampoline symbols to be searchable by name we can remove this and
+    // then possibly add a new bool to any of the Symtab functions that
+    // lookup symbols by name to indicate if they want trampolines. We also
+    // don't want any synthetic symbols with auto generated names in the
+    // name lookups.
+    if (symbol->IsTrampoline() || symbol->IsSyntheticWithAutoGeneratedName())
+      continue;
 
-        if (symbol->ContainsLinkerAnnotations()) {
-          // If the symbol has linker annotations, also add the version without
-          // the annotations.
-          ConstString stripped = ConstString(
-              m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
-          name_to_index.Append(stripped, value);
-        }
+    // If the symbol's name string matched a Mangled::ManglingScheme, it is
+    // stored in the mangled field.
+    Mangled &mangled = symbol->GetMangled();
+    if (ConstString name = mangled.GetMangledName()) {
+      name_to_index.Append(name, value);
+
+      if (symbol->ContainsLinkerAnnotations()) {
+        // If the symbol has linker annotations, also add the version without
+        // the annotations.
+        ConstString stripped = ConstString(
+            m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
+        name_to_index.Append(stripped, value);
+      }
 
-        const SymbolType type = symbol->GetType();
-        if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
-          if (mangled.GetRichManglingInfo(rmc, lldb_skip_name)) {
-            RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
-            continue;
-          }
+      const SymbolType type = symbol->GetType();
+      if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
+        if (mangled.GetRichManglingInfo(rmc, lldb_skip_name)) {
+          RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
+          continue;
         }
       }
+    }
 
-      // Symbol name strings that didn't match a Mangled::ManglingScheme, are
-      // stored in the demangled field.
-      if (ConstString name = mangled.GetDemangledName()) {
-        name_to_index.Append(name, value);
+    // Symbol name strings that didn't match a Mangled::ManglingScheme, are
+    // stored in the demangled field.
+    if (ConstString name = mangled.GetDemangledName()) {
+      name_to_index.Append(name, value);
 
-        if (symbol->ContainsLinkerAnnotations()) {
-          // If the symbol has linker annotations, also add the version without
-          // the annotations.
-          name = ConstString(
-              m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
-          name_to_index.Append(name, value);
-        }
+      if (symbol->ContainsLinkerAnnotations()) {
+        // If the symbol has linker annotations, also add the version without
+        // the annotations.
+        name = ConstString(
+            m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
+        name_to_index.Append(name, value);
+      }
 
-        // If the demangled name turns out to be an ObjC name, and is a category
-        // name, add the version without categories to the index too.
-        for (Language *lang : languages) {
-          for (auto variant : lang->GetMethodNameVariants(name)) {
-            if (variant.GetType() & lldb::eFunctionNameTypeSelector)
-              selector_to_index.Append(variant.GetName(), value);
-            else if (variant.GetType() & lldb::eFunctionNameTypeFull)
-              name_to_index.Append(variant.GetName(), value);
-            else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
-              method_to_index.Append(variant.GetName(), value);
-            else if (variant.GetType() & lldb::eFunctionNameTypeBase)
-              basename_to_index.Append(variant.GetName(), value);
-          }
+      // If the demangled name turns out to be an ObjC name, and is a category
+      // name, add the version without categories to the index too.
+      for (Language *lang : languages) {
+        for (auto variant : lang->GetMethodNameVariants(name)) {
+          if (variant.GetType() & lldb::eFunctionNameTypeSelector)
+            selector_to_index.Append(variant.GetName(), value);
+          else if (variant.GetType() & lldb::eFunctionNameTypeFull)
+            name_to_index.Append(variant.GetName(), value);
+          else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
+            method_to_index.Append(variant.GetName(), value);
+          else if (variant.GetType() & lldb::eFunctionNameTypeBase)
+            basename_to_index.Append(variant.GetName(), value);
         }
       }
     }
+  }
 
-    for (const auto &record : backlog) {
-      RegisterBacklogEntry(record.first, record.second, class_contexts);
-    }
-
-    name_to_index.Sort();
-    name_to_index.SizeToFit();
-    selector_to_index.Sort();
-    selector_to_index.SizeToFit();
-    basename_to_index.Sort();
-    basename_to_index.SizeToFit();
-    method_to_index.Sort();
-    method_to_index.SizeToFit();
+  for (const auto &record : backlog) {
+    RegisterBacklogEntry(record.first, record.second, class_contexts);
   }
+
+  name_to_index.Sort();
+  name_to_index.SizeToFit();
+  selector_to_index.Sort();
+  selector_to_index.SizeToFit();
+  basename_to_index.Sort();
+  basename_to_index.SizeToFit();
+  method_to_index.Sort();
+  method_to_index.SizeToFit();
 }
 
 void Symtab::RegisterMangledNameEntry(
@@ -684,8 +685,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
 
   if (symbol_name) {
-    if (!m_name_indexes_computed)
-      InitNameIndexes();
+    InitNameIndexes();
 
     return GetNameIndexes(symbol_name, indexes);
   }
@@ -701,8 +701,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
   LLDB_SCOPED_TIMER();
   if (symbol_name) {
     const size_t old_size = indexes.size();
-    if (!m_name_indexes_computed)
-      InitNameIndexes();
+    InitNameIndexes();
 
     std::vector<uint32_t> all_name_indexes;
     const size_t name_match_count =
@@ -823,8 +822,7 @@ Symtab::FindAllSymbolsWithNameAndType(ConstString name,
 
   // Initialize all of the lookup by name indexes before converting NAME to a
   // uniqued string NAME_STR below.
-  if (!m_name_indexes_computed)
-    InitNameIndexes();
+  InitNameIndexes();
 
   if (name) {
     // The string table did have a string that matched, but we need to check
@@ -841,8 +839,7 @@ void Symtab::FindAllSymbolsWithNameAndType(
   LLDB_SCOPED_TIMER();
   // Initialize all of the lookup by name indexes before converting NAME to a
   // uniqued string NAME_STR below.
-  if (!m_name_indexes_computed)
-    InitNameIndexes();
+  InitNameIndexes();
 
   if (name) {
     // The string table did have a string that matched, but we need to check
@@ -870,8 +867,7 @@ Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
                                                Visibility symbol_visibility) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
   LLDB_SCOPED_TIMER();
-  if (!m_name_indexes_computed)
-    InitNameIndexes();
+  InitNameIndexes();
 
   if (name) {
     std::vector<uint32_t> matching_indexes;
@@ -1126,8 +1122,8 @@ void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
     }
   }
 
-  if (!m_name_indexes_computed)
-    InitNameIndexes();
+  std::lock_guard<std::recursive_mutex> guard(m_mutex);
+  InitNameIndexes();
 
   for (lldb::FunctionNameType type :
        {lldb::eFunctionNameTypeBase, lldb::eFunctionNameTypeMethod,
@@ -1178,7 +1174,9 @@ void Symtab::SaveToCache() {
   DataFileCache *cache = Module::GetIndexCache();
   if (!cache)
     return; // Caching is not enabled.
-  InitNameIndexes(); // Init the name indexes so we can cache them as well.
+
+  // Init the name indexes so we can cache them as well.
+  InitNameIndexes();
   const auto byte_order = endian::InlHostByteOrder();
   DataEncoder file(byte_order, /*addr_size=*/8);
   // Encode will return false if the symbol table's object file doesn't have



More information about the lldb-commits mailing list