[Lldb-commits] [lldb] update ManualDWARFIndex::Index to use std::once (PR #165896)

Fri Oct 31 11:10:51 PDT 2025

https://github.com/zhyty created https://github.com/llvm/llvm-project/pull/165896

Small change to use (what I think is) a better practice -- we were using the `m_indexed` bool member to make sure we called `Index()` once, but we should just use `std::once`! This change shouldn't affect functionality. 

This change may also make concurrent access to `Index()` thread-safe, though the ManualDWARFIndex API isn't completely thread-safe due to `Decode()`. I'm not sure if ManualDWARFIndex was ever intended to be thread-safe.

Test Plan:

`ninja check-lldb`

Tested basic debugging workflow of a couple of large projects I had built. Basically:
```
(lldb) target create <project>
(lldb) b main
(lldb) r
(lldb) step
... 
```

I A/B tested the performance of launching several modules with parallel module loading and didn't observe any performance regressions.

>From fa605ef0b0d1a5c2e3fb232b740311dddf5ee226 Mon Sep 17 00:00:00 2001
From: Tom Yang <toyang at fb.com>
Date: Tue, 14 Oct 2025 00:09:38 -0700
Subject: [PATCH 1/2] update ManualDWARFIndex::Index to use std::once

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .../SymbolFile/DWARF/ManualDWARFIndex.cpp     | 207 +++++++++---------
 .../SymbolFile/DWARF/ManualDWARFIndex.h       |   2 +-
 2 files changed, 106 insertions(+), 103 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index d90108f687f84..ec225f446c45c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -33,118 +33,121 @@ using namespace lldb_private::plugin::dwarf;
 using namespace llvm::dwarf;
 
 void ManualDWARFIndex::Index() {
-  if (m_indexed)
-    return;
-  m_indexed = true;
-
-  ElapsedTime elapsed(m_index_time);
-  LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));
-  if (LoadFromCache()) {
-    m_dwarf->SetDebugInfoIndexWasLoadedFromCache();
-    return;
-  }
+  // TODO(toyang): wrapping it in the call_once isn't really thread-safe either.
+  // Though, if index callers always call index() before any read/write
+  // operation (and would therefore wait here), maybe it's ok?
+  // TODO(toyang): we wrap this logic in the call_once, but maybe it's better if
+  // it's a separate function that we call instead?
+  std::call_once(m_indexed_flag, [this]() {
+    ElapsedTime elapsed(m_index_time);
+    LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));
+    if (LoadFromCache()) {
+      m_dwarf->SetDebugInfoIndexWasLoadedFromCache();
+      return;
+    }
 
-  DWARFDebugInfo &main_info = m_dwarf->DebugInfo();
-  SymbolFileDWARFDwo *dwp_dwarf = m_dwarf->GetDwpSymbolFile().get();
-  DWARFDebugInfo *dwp_info = dwp_dwarf ? &dwp_dwarf->DebugInfo() : nullptr;
-
-  std::vector<DWARFUnit *> units_to_index;
-  units_to_index.reserve(main_info.GetNumUnits() +
-                         (dwp_info ? dwp_info->GetNumUnits() : 0));
-
-  // Process all units in the main file, as well as any type units in the dwp
-  // file. Type units in dwo files are handled when we reach the dwo file in
-  // IndexUnit.
-  for (size_t U = 0; U < main_info.GetNumUnits(); ++U) {
-    DWARFUnit *unit = main_info.GetUnitAtIndex(U);
-    if (unit && m_units_to_avoid.count(unit->GetOffset()) == 0)
-      units_to_index.push_back(unit);
-  }
-  if (dwp_info && dwp_info->ContainsTypeUnits()) {
-    for (size_t U = 0; U < dwp_info->GetNumUnits(); ++U) {
-      if (auto *tu =
-              llvm::dyn_cast<DWARFTypeUnit>(dwp_info->GetUnitAtIndex(U))) {
-        if (!m_type_sigs_to_avoid.contains(tu->GetTypeHash()))
-          units_to_index.push_back(tu);
+    DWARFDebugInfo &main_info = m_dwarf->DebugInfo();
+    SymbolFileDWARFDwo *dwp_dwarf = m_dwarf->GetDwpSymbolFile().get();
+    DWARFDebugInfo *dwp_info = dwp_dwarf ? &dwp_dwarf->DebugInfo() : nullptr;
+
+    std::vector<DWARFUnit *> units_to_index;
+    units_to_index.reserve(main_info.GetNumUnits() +
+                           (dwp_info ? dwp_info->GetNumUnits() : 0));
+
+    // Process all units in the main file, as well as any type units in the dwp
+    // file. Type units in dwo files are handled when we reach the dwo file in
+    // IndexUnit.
+    for (size_t U = 0; U < main_info.GetNumUnits(); ++U) {
+      DWARFUnit *unit = main_info.GetUnitAtIndex(U);
+      if (unit && m_units_to_avoid.count(unit->GetOffset()) == 0)
+        units_to_index.push_back(unit);
+    }
+    if (dwp_info && dwp_info->ContainsTypeUnits()) {
+      for (size_t U = 0; U < dwp_info->GetNumUnits(); ++U) {
+        if (auto *tu =
+                llvm::dyn_cast<DWARFTypeUnit>(dwp_info->GetUnitAtIndex(U))) {
+          if (!m_type_sigs_to_avoid.contains(tu->GetTypeHash()))
+            units_to_index.push_back(tu);
+        }
       }
     }
-  }
 
-  if (units_to_index.empty())
-    return;
-
-  StreamString module_desc;
-  m_module.GetDescription(module_desc.AsRawOstream(),
-                          lldb::eDescriptionLevelBrief);
-
-  // Include 2 passes per unit to index for extracting DIEs from the unit and
-  // indexing the unit, and then extra entries for finalizing each index in the
-  // set.
-  const auto indices = IndexSet<NameToDIE>::Indices();
-  const uint64_t total_progress = units_to_index.size() * 2 + indices.size();
-  Progress progress("Manually indexing DWARF", module_desc.GetData(),
-                    total_progress, /*debugger=*/nullptr,
-                    Progress::kDefaultHighFrequencyReportTime);
-
-  // Share one thread pool across operations to avoid the overhead of
-  // recreating the threads.
-  llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
-  const size_t num_threads = Debugger::GetThreadPool().getMaxConcurrency();
-
-  // Run a function for each compile unit in parallel using as many threads as
-  // are available. This is significantly faster than submiting a new task for
-  // each unit.
-  auto for_each_unit = [&](auto &&fn) {
-    std::atomic<size_t> next_cu_idx = 0;
-    auto wrapper = [&fn, &next_cu_idx, &units_to_index,
-                    &progress](size_t worker_id) {
-      size_t cu_idx;
-      while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
-             units_to_index.size()) {
-        fn(worker_id, cu_idx, units_to_index[cu_idx]);
-        progress.Increment();
-      }
-    };
+    if (units_to_index.empty())
+      return;
 
-    for (size_t i = 0; i < num_threads; ++i)
-      task_group.async(wrapper, i);
+    StreamString module_desc;
+    m_module.GetDescription(module_desc.AsRawOstream(),
+                            lldb::eDescriptionLevelBrief);
+
+    // Include 2 passes per unit to index for extracting DIEs from the unit and
+    // indexing the unit, and then extra entries for finalizing each index in
+    // the set.
+    const auto indices = IndexSet<NameToDIE>::Indices();
+    const uint64_t total_progress = units_to_index.size() * 2 + indices.size();
+    Progress progress("Manually indexing DWARF", module_desc.GetData(),
+                      total_progress, /*debugger=*/nullptr,
+                      Progress::kDefaultHighFrequencyReportTime);
+
+    // Share one thread pool across operations to avoid the overhead of
+    // recreating the threads.
+    llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
+    const size_t num_threads = Debugger::GetThreadPool().getMaxConcurrency();
+
+    // Run a function for each compile unit in parallel using as many threads as
+    // are available. This is significantly faster than submiting a new task for
+    // each unit.
+    auto for_each_unit = [&](auto &&fn) {
+      std::atomic<size_t> next_cu_idx = 0;
+      auto wrapper = [&fn, &next_cu_idx, &units_to_index,
+                      &progress](size_t worker_id) {
+        size_t cu_idx;
+        while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
+               units_to_index.size()) {
+          fn(worker_id, cu_idx, units_to_index[cu_idx]);
+          progress.Increment();
+        }
+      };
 
-    task_group.wait();
-  };
-
-  // Extract dies for all DWARFs unit in parallel.  Figure out which units
-  // didn't have their DIEs already parsed and remember this.  If no DIEs were
-  // parsed prior to this index function call, we are going to want to clear the
-  // CU dies after we are done indexing to make sure we don't pull in all DWARF
-  // dies, but we need to wait until all units have been indexed in case a DIE
-  // in one unit refers to another and the indexes accesses those DIEs.
-  std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
-      units_to_index.size());
-  for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
-    clear_cu_dies[idx] = unit->ExtractDIEsScoped();
-  });
+      for (size_t i = 0; i < num_threads; ++i)
+        task_group.async(wrapper, i);
 
-  // Now index all DWARF unit in parallel.
-  std::vector<IndexSet<NameToDIE>> sets(num_threads);
-  for_each_unit(
-      [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
-        IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
-      });
+      task_group.wait();
+    };
 
-  // Merge partial indexes into a single index. Process each index in a set in
-  // parallel.
-  for (NameToDIE IndexSet<NameToDIE>::*index : indices) {
-    task_group.async([this, &sets, index, &progress]() {
-      NameToDIE &result = m_set.*index;
-      for (auto &set : sets)
-        result.Append(set.*index);
-      result.Finalize();
-      progress.Increment();
+    // Extract dies for all DWARFs unit in parallel.  Figure out which units
+    // didn't have their DIEs already parsed and remember this.  If no DIEs were
+    // parsed prior to this index function call, we are going to want to clear
+    // the CU dies after we are done indexing to make sure we don't pull in all
+    // DWARF dies, but we need to wait until all units have been indexed in case
+    // a DIE in one unit refers to another and the indexes accesses those DIEs.
+    std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
+        units_to_index.size());
+    for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
+      clear_cu_dies[idx] = unit->ExtractDIEsScoped();
     });
-  }
-  task_group.wait();
 
-  SaveToCache();
+    // Now index all DWARF unit in parallel.
+    std::vector<IndexSet<NameToDIE>> sets(num_threads);
+    for_each_unit(
+        [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
+          IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
+        });
+
+    // Merge partial indexes into a single index. Process each index in a set in
+    // parallel.
+    for (NameToDIE IndexSet<NameToDIE>::*index : indices) {
+      task_group.async([this, &sets, index, &progress]() {
+        NameToDIE &result = m_set.*index;
+        for (auto &set : sets)
+          result.Append(set.*index);
+        result.Finalize();
+        progress.Increment();
+      });
+    }
+    task_group.wait();
+
+    SaveToCache();
+  });
 }
 
 void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
index 0b5b2f3e84309..f7d45b5a24990 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
@@ -170,7 +170,7 @@ class ManualDWARFIndex : public DWARFIndex {
   llvm::DenseSet<uint64_t> m_type_sigs_to_avoid;
 
   IndexSet<NameToDIE> m_set;
-  bool m_indexed = false;
+  std::once_flag m_indexed_flag;
 };
 } // namespace dwarf
 } // namespace lldb_private::plugin

>From bf2c75613fc6b62d6ca064d7604eeae20f18a6ec Mon Sep 17 00:00:00 2001
From: Tom Yang <toyang at fb.com>
Date: Fri, 31 Oct 2025 10:59:21 -0700
Subject: [PATCH 2/2] remove TODO comment

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index ec225f446c45c..aa30682faed90 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -22,7 +22,6 @@
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/Timer.h"
 #include "lldb/lldb-private-enumerations.h"
-#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ThreadPool.h"
 #include <atomic>
 #include <optional>
@@ -33,11 +32,6 @@ using namespace lldb_private::plugin::dwarf;
 using namespace llvm::dwarf;
 
 void ManualDWARFIndex::Index() {
-  // TODO(toyang): wrapping it in the call_once isn't really thread-safe either.
-  // Though, if index callers always call index() before any read/write
-  // operation (and would therefore wait here), maybe it's ok?
-  // TODO(toyang): we wrap this logic in the call_once, but maybe it's better if
-  // it's a separate function that we call instead?
   std::call_once(m_indexed_flag, [this]() {
     ElapsedTime elapsed(m_index_time);
     LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));