[Lldb-commits] [lldb] [WIP][RFC][lldb] faster indexing (PR #122128)
Pavel Labath via lldb-commits
lldb-commits at lists.llvm.org
Wed Jan 15 08:53:16 PST 2025
https://github.com/labath updated https://github.com/llvm/llvm-project/pull/122128
>From f6aee6ad61745e20079d7d56a643dc61a49132b8 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 8 Jan 2025 15:27:30 +0000
Subject: [PATCH] faster indexing
---
.../Plugins/SymbolFile/DWARF/DWARFUnit.h | 1 -
.../SymbolFile/DWARF/ManualDWARFIndex.cpp | 282 ++++++-----------
.../SymbolFile/DWARF/ManualDWARFIndex.h | 24 +-
.../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 135 +++-----
.../Plugins/SymbolFile/DWARF/NameToDIE.h | 81 ++++-
.../unittests/SymbolFile/DWARF/CMakeLists.txt | 1 -
.../DWARF/DWARFIndexCachingTest.cpp | 292 ------------------
7 files changed, 222 insertions(+), 594 deletions(-)
delete mode 100644 lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index ba142ae86fe0e5..da419169e25395 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -24,7 +24,6 @@ namespace lldb_private::plugin {
namespace dwarf {
class DWARFUnit;
class DWARFCompileUnit;
-class NameToDIE;
class SymbolFileDWARF;
class SymbolFileDWARFDwo;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index 6f2c45e74132c1..1e180e3d296e2b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -21,10 +21,11 @@
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/Stream.h"
#include "lldb/Utility/Timer.h"
-#include "llvm/Support/FormatVariadic.h"
+#include "Plugins/SymbolFile/DWARF/NameToDIE.h"
#include "llvm/Support/ThreadPool.h"
#include <atomic>
#include <optional>
+#include <stdbool.h>
using namespace lldb_private;
using namespace lldb;
@@ -37,7 +38,6 @@ void ManualDWARFIndex::Index() {
m_indexed = true;
ElapsedTime elapsed(m_index_time);
- LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));
if (LoadFromCache()) {
m_dwarf->SetDebugInfoIndexWasLoadedFromCache();
return;
@@ -91,16 +91,15 @@ void ManualDWARFIndex::Index() {
// Run a function for each compile unit in parallel using as many threads as
// are available. This is significantly faster than submiting a new task for
// each unit.
- auto for_each_unit = [&](auto &&fn) {
+ auto for_each_unit = [&](auto &&fn, auto &&reduce) {
std::atomic<size_t> next_cu_idx = 0;
- auto wrapper = [&fn, &next_cu_idx, &units_to_index,
- &progress](size_t worker_id) {
+ auto wrapper = [&fn, &next_cu_idx, &units_to_index, &reduce](size_t worker_id) {
size_t cu_idx;
while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
units_to_index.size()) {
fn(worker_id, cu_idx, units_to_index[cu_idx]);
- progress.Increment();
}
+ reduce(worker_id);
};
for (size_t i = 0; i < num_threads; ++i)
@@ -109,6 +108,10 @@ void ManualDWARFIndex::Index() {
task_group.wait();
};
+#define TTT(x) \
+ static ::lldb_private::Timer::Category _cat(x); \
+ ::lldb_private::Timer _scoped_timer(_cat, "")
+
// Extract dies for all DWARFs unit in parallel. Figure out which units
// didn't have their DIEs already parsed and remember this. If no DIEs were
// parsed prior to this index function call, we are going to want to clear the
@@ -117,42 +120,92 @@ void ManualDWARFIndex::Index() {
// in one unit refers to another and the indexes accesses those DIEs.
std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
units_to_index.size());
- for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
- clear_cu_dies[idx] = unit->ExtractDIEsScoped();
- });
+ {
+ TTT("INDEX - EXTRACT");
+ for_each_unit(
+ [&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
+ clear_cu_dies[idx] = unit->ExtractDIEsScoped();
+ },
+ [](size_t) {});
+ }
// Now index all DWARF unit in parallel.
- std::vector<IndexSet> sets(num_threads);
- for_each_unit(
- [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
- IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
- });
-
- // Merge partial indexes into a single index. Process each index in a set in
- // parallel.
- auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
- NameToDIE &result = m_set.*index;
- for (auto &set : sets)
- result.Append(set.*index);
- result.Finalize();
- progress.Increment();
- };
+ std::vector<IndexSet<NameToDIEX>> sets(num_threads);
+ {
+ TTT("INDEX - INDEX");
+
+ for_each_unit(
+ [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
+ IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
+ },
+ [&sets](size_t worker_id) {
+ auto &set = sets[worker_id];
+ set.function_basenames.Finalize();
+ set.function_fullnames.Finalize();
+ set.function_methods.Finalize();
+ set.function_selectors.Finalize();
+ set.objc_class_selectors.Finalize();
+ set.globals.Finalize();
+ set.types.Finalize();
+ set.namespaces.Finalize();
+ });
+ }
- task_group.async(finalize_fn, &IndexSet::function_basenames);
- task_group.async(finalize_fn, &IndexSet::function_fullnames);
- task_group.async(finalize_fn, &IndexSet::function_methods);
- task_group.async(finalize_fn, &IndexSet::function_selectors);
- task_group.async(finalize_fn, &IndexSet::objc_class_selectors);
- task_group.async(finalize_fn, &IndexSet::globals);
- task_group.async(finalize_fn, &IndexSet::types);
- task_group.async(finalize_fn, &IndexSet::namespaces);
- task_group.wait();
+#define FINALIZE_FN(index, slice) \
+ ([this, slice, &sets, &progress] { \
+ auto &result = this->m_set.index; \
+ size_t count = 0; \
+ for (auto &set : sets) \
+ count += (slice + 1 < set.index.m_starts.size() \
+ ? set.index.m_starts[slice + 1] \
+ : set.index.m_map.end()) - \
+ set.index.m_map.begin(); \
+ result.Reserve(count, slice); \
+ for (auto &set : sets) \
+ result.Append(set.index, slice); \
+ progress.Increment(); \
+ })
+ {
+ TTT("INDEX - APPEND");
+ for (unsigned slice = 0; slice < 4; ++slice) {
+ task_group.async(FINALIZE_FN(function_basenames, slice));
+ task_group.async(FINALIZE_FN(function_fullnames, slice));
+ task_group.async(FINALIZE_FN(function_methods, slice));
+ task_group.async(FINALIZE_FN(function_selectors, slice));
+ task_group.async(FINALIZE_FN(objc_class_selectors, slice));
+ task_group.async(FINALIZE_FN(globals, slice));
+ task_group.async(FINALIZE_FN(types, slice));
+ task_group.async(FINALIZE_FN(namespaces, slice));
+ }
+ task_group.wait();
+ }
+
+#undef FINALIZE_FN
+#define FINALIZE_FN(index, slice) \
+ ([this, &progress, slice] { \
+ this->m_set.index.Finalize(slice); \
+ progress.Increment(); \
+ })
+ {
+ TTT("INDEX - FINALIZE");
+ for (unsigned slice = 0; slice < 4; ++slice) {
+ task_group.async(FINALIZE_FN(function_basenames, slice));
+ task_group.async(FINALIZE_FN(function_fullnames, slice));
+ task_group.async(FINALIZE_FN(function_methods, slice));
+ task_group.async(FINALIZE_FN(function_selectors, slice));
+ task_group.async(FINALIZE_FN(objc_class_selectors, slice));
+ task_group.async(FINALIZE_FN(globals, slice));
+ task_group.async(FINALIZE_FN(types, slice));
+ task_group.async(FINALIZE_FN(namespaces, slice));
+ }
+ task_group.wait();
+ }
SaveToCache();
}
void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
- IndexSet &set) {
+ IndexSet<NameToDIEX> &set) {
Log *log = GetLog(DWARFLog::Lookups);
if (log) {
@@ -210,7 +263,7 @@ void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
const LanguageType cu_language,
- IndexSet &set) {
+ IndexSet<NameToDIEX> &set) {
for (const DWARFDebugInfoEntry &die : unit.dies()) {
const dw_tag_t tag = die.Tag();
@@ -306,42 +359,17 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
if (has_address) {
if (name) {
bool is_objc_method = false;
- if (cu_language == eLanguageTypeObjC ||
- cu_language == eLanguageTypeObjC_plus_plus) {
- std::optional<const ObjCLanguage::MethodName> objc_method =
- ObjCLanguage::MethodName::Create(name, true);
- if (objc_method) {
- is_objc_method = true;
- ConstString class_name_with_category(
- objc_method->GetClassNameWithCategory());
- ConstString objc_selector_name(objc_method->GetSelector());
- ConstString objc_fullname_no_category_name(
- objc_method->GetFullNameWithoutCategory().c_str());
- ConstString class_name_no_category(objc_method->GetClassName());
- set.function_fullnames.Insert(ConstString(name), ref);
- if (class_name_with_category)
- set.objc_class_selectors.Insert(class_name_with_category, ref);
- if (class_name_no_category &&
- class_name_no_category != class_name_with_category)
- set.objc_class_selectors.Insert(class_name_no_category, ref);
- if (objc_selector_name)
- set.function_selectors.Insert(objc_selector_name, ref);
- if (objc_fullname_no_category_name)
- set.function_fullnames.Insert(objc_fullname_no_category_name,
- ref);
- }
- }
// If we have a mangled name, then the DW_AT_name attribute is
// usually the method name without the class or any parameters
bool is_method = DWARFDIE(&unit, &die).IsMethod();
if (is_method)
- set.function_methods.Insert(ConstString(name), ref);
+ set.function_methods.Insert(name, ref);
else
- set.function_basenames.Insert(ConstString(name), ref);
+ set.function_basenames.Insert(name, ref);
if (!is_method && !mangled_cstr && !is_objc_method)
- set.function_fullnames.Insert(ConstString(name), ref);
+ set.function_fullnames.Insert(name, ref);
}
if (mangled_cstr) {
// Make sure our mangled name isn't the same string table entry as
@@ -351,7 +379,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
if (name && name != mangled_cstr &&
((mangled_cstr[0] == '_') ||
(::strcmp(name, mangled_cstr) != 0))) {
- set.function_fullnames.Insert(ConstString(mangled_cstr), ref);
+ set.function_fullnames.Insert(mangled_cstr, ref);
}
}
}
@@ -369,15 +397,15 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
case DW_TAG_union_type:
case DW_TAG_unspecified_type:
if (name && !is_declaration)
- set.types.Insert(ConstString(name), ref);
+ set.types.Insert(name, ref);
if (mangled_cstr && !is_declaration)
- set.types.Insert(ConstString(mangled_cstr), ref);
+ set.types.Insert(mangled_cstr, ref);
break;
case DW_TAG_namespace:
case DW_TAG_imported_declaration:
if (name)
- set.namespaces.Insert(ConstString(name), ref);
+ set.namespaces.Insert(name, ref);
break;
case DW_TAG_member: {
@@ -394,7 +422,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
}
case DW_TAG_variable:
if (name && has_location_or_const_value && is_global_or_static_variable) {
- set.globals.Insert(ConstString(name), ref);
+ set.globals.Insert(name, ref);
// Be sure to include variables by their mangled and demangled names if
// they have any since a variable can have a basename "i", a mangled
// named "_ZN12_GLOBAL__N_11iE" and a demangled mangled name
@@ -406,7 +434,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
// entries
if (mangled_cstr && name != mangled_cstr &&
((mangled_cstr[0] == '_') || (::strcmp(name, mangled_cstr) != 0))) {
- set.globals.Insert(ConstString(mangled_cstr), ref);
+ set.globals.Insert(mangled_cstr, ref);
}
}
break;
@@ -575,120 +603,14 @@ enum DataID {
// index name tables. See DIERef class for details.
constexpr uint32_t CURRENT_CACHE_VERSION = 2;
-bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data,
+template<typename T>
+bool ManualDWARFIndex::IndexSet<T>::Decode(const DataExtractor &data,
lldb::offset_t *offset_ptr) {
- StringTableReader strtab;
- // We now decode the string table for all strings in the data cache file.
- if (!strtab.Decode(data, offset_ptr))
- return false;
-
- llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
- if (identifier != kIdentifierManualDWARFIndex)
- return false;
- const uint32_t version = data.GetU32(offset_ptr);
- if (version != CURRENT_CACHE_VERSION)
- return false;
-
- bool done = false;
- while (!done) {
- switch (data.GetU8(offset_ptr)) {
- default:
- // If we got here, this is not expected, we expect the data IDs to match
- // one of the values from the DataID enumeration.
- return false;
- case kDataIDFunctionBasenames:
- if (!function_basenames.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDFunctionFullnames:
- if (!function_fullnames.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDFunctionMethods:
- if (!function_methods.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDFunctionSelectors:
- if (!function_selectors.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDFunctionObjcClassSelectors:
- if (!objc_class_selectors.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDGlobals:
- if (!globals.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDTypes:
- if (!types.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDNamespaces:
- if (!namespaces.Decode(data, offset_ptr, strtab))
- return false;
- break;
- case kDataIDEnd:
- // We got to the end of our NameToDIE encodings.
- done = true;
- break;
- }
- }
- // Success!
return true;
}
-void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const {
- ConstStringTable strtab;
-
- // Encoder the DWARF index into a separate encoder first. This allows us
- // gather all of the strings we willl need in "strtab" as we will need to
- // write the string table out before the symbol table.
- DataEncoder index_encoder(encoder.GetByteOrder(),
- encoder.GetAddressByteSize());
-
- index_encoder.AppendData(kIdentifierManualDWARFIndex);
- // Encode the data version.
- index_encoder.AppendU32(CURRENT_CACHE_VERSION);
-
- if (!function_basenames.IsEmpty()) {
- index_encoder.AppendU8(kDataIDFunctionBasenames);
- function_basenames.Encode(index_encoder, strtab);
- }
- if (!function_fullnames.IsEmpty()) {
- index_encoder.AppendU8(kDataIDFunctionFullnames);
- function_fullnames.Encode(index_encoder, strtab);
- }
- if (!function_methods.IsEmpty()) {
- index_encoder.AppendU8(kDataIDFunctionMethods);
- function_methods.Encode(index_encoder, strtab);
- }
- if (!function_selectors.IsEmpty()) {
- index_encoder.AppendU8(kDataIDFunctionSelectors);
- function_selectors.Encode(index_encoder, strtab);
- }
- if (!objc_class_selectors.IsEmpty()) {
- index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors);
- objc_class_selectors.Encode(index_encoder, strtab);
- }
- if (!globals.IsEmpty()) {
- index_encoder.AppendU8(kDataIDGlobals);
- globals.Encode(index_encoder, strtab);
- }
- if (!types.IsEmpty()) {
- index_encoder.AppendU8(kDataIDTypes);
- types.Encode(index_encoder, strtab);
- }
- if (!namespaces.IsEmpty()) {
- index_encoder.AppendU8(kDataIDNamespaces);
- namespaces.Encode(index_encoder, strtab);
- }
- index_encoder.AppendU8(kDataIDEnd);
-
- // Now that all strings have been gathered, we will emit the string table.
- strtab.Encode(encoder);
- // Followed by the symbol table data.
- encoder.AppendData(index_encoder.GetData());
+template<typename T>
+void ManualDWARFIndex::IndexSet<T>::Encode(DataEncoder &encoder) const {
}
bool ManualDWARFIndex::Decode(const DataExtractor &data,
@@ -702,7 +624,7 @@ bool ManualDWARFIndex::Decode(const DataExtractor &data,
signature_mismatch = true;
return false;
}
- IndexSet set;
+ IndexSet<NameToDIE> set;
if (!set.Decode(data, offset_ptr))
return false;
m_set = std::move(set);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
index 6a52c88a99220f..281ec869a9b8b9 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
@@ -59,15 +59,16 @@ class ManualDWARFIndex : public DWARFIndex {
void Dump(Stream &s) override;
// Make IndexSet public so we can unit test the encoding and decoding logic.
+ template<typename T>
struct IndexSet {
- NameToDIE function_basenames;
- NameToDIE function_fullnames;
- NameToDIE function_methods;
- NameToDIE function_selectors;
- NameToDIE objc_class_selectors;
- NameToDIE globals;
- NameToDIE types;
- NameToDIE namespaces;
+ T function_basenames;
+ T function_fullnames;
+ T function_methods;
+ T function_selectors;
+ T objc_class_selectors;
+ T globals;
+ T types;
+ T namespaces;
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr);
void Encode(DataEncoder &encoder) const;
bool operator==(const IndexSet &rhs) const {
@@ -162,11 +163,12 @@ class ManualDWARFIndex : public DWARFIndex {
/// false if the symbol table wasn't cached or was out of date.
bool LoadFromCache();
- void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set);
+ void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
+ IndexSet<NameToDIEX> &set);
static void IndexUnitImpl(DWARFUnit &unit,
const lldb::LanguageType cu_language,
- IndexSet &set);
+ IndexSet<NameToDIEX> &set);
/// Return true if this manual DWARF index is covering only part of the DWARF.
///
@@ -184,7 +186,7 @@ class ManualDWARFIndex : public DWARFIndex {
llvm::DenseSet<dw_offset_t> m_units_to_avoid;
llvm::DenseSet<uint64_t> m_type_sigs_to_avoid;
- IndexSet m_set;
+ IndexSet<NameToDIE> m_set;
bool m_indexed = false;
};
} // namespace dwarf
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
index 44d90648700cfb..6e8a6b9f9c85bb 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
@@ -8,144 +8,93 @@
#include "NameToDIE.h"
#include "DWARFUnit.h"
+#include "Plugins/SymbolFile/DWARF/DIERef.h"
#include "lldb/Core/DataFileCache.h"
-#include "lldb/Symbol/ObjectFile.h"
-#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/DataEncoder.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/RegularExpression.h"
#include "lldb/Utility/Stream.h"
-#include "lldb/Utility/StreamString.h"
+#include <algorithm>
#include <optional>
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::plugin::dwarf;
-void NameToDIE::Finalize() {
- m_map.Sort(std::less<DIERef>());
- m_map.SizeToFit();
-}
-
-void NameToDIE::Insert(ConstString name, const DIERef &die_ref) {
- m_map.Append(name, die_ref);
-}
-
-bool NameToDIE::Find(ConstString name,
+bool NameToDIE::Find(llvm::StringRef name,
llvm::function_ref<bool(DIERef ref)> callback) const {
- for (const auto &entry : m_map.equal_range(name))
- if (!callback(entry.value))
+ uint32_t hash = djbHash(name);
+ auto &map = m_map[hash >> 30];
+ auto range = std::equal_range(map.begin(), map.end(),
+ NameToDIEX::Tuple(hash, 0, nullptr, 0),
+ llvm::less_first());
+ for (const auto &[_, len, n, ref] : llvm::make_range(range))
+ if (name == llvm::StringRef(n, len) && !callback(DIERef(ref)))
return false;
return true;
}
bool NameToDIE::Find(const RegularExpression ®ex,
llvm::function_ref<bool(DIERef ref)> callback) const {
- for (const auto &entry : m_map)
- if (regex.Execute(entry.cstring.GetCString())) {
- if (!callback(entry.value))
- return false;
+ for (const auto &map : m_map) {
+ for (const auto &[_, len, name, ref] : map) {
+ if (regex.Execute(llvm::StringRef(name, len))) {
+ if (!callback(DIERef(ref)))
+ return false;
+ }
}
+ }
return true;
}
void NameToDIE::FindAllEntriesForUnit(
DWARFUnit &s_unit, llvm::function_ref<bool(DIERef ref)> callback) const {
const DWARFUnit &ns_unit = s_unit.GetNonSkeletonUnit();
- const uint32_t size = m_map.GetSize();
- for (uint32_t i = 0; i < size; ++i) {
- const DIERef &die_ref = m_map.GetValueAtIndexUnchecked(i);
- if (ns_unit.GetSymbolFileDWARF().GetFileIndex() == die_ref.file_index() &&
- ns_unit.GetDebugSection() == die_ref.section() &&
- ns_unit.GetOffset() <= die_ref.die_offset() &&
- die_ref.die_offset() < ns_unit.GetNextUnitOffset()) {
- if (!callback(die_ref))
- return;
+ for (const auto &map : m_map) {
+ for (const auto &[_, len, name, ref] : map) {
+ DIERef die_ref(ref);
+ if (ns_unit.GetDebugSection() == die_ref.section() &&
+ ns_unit.GetSymbolFileDWARF().GetFileIndex() == die_ref.file_index() &&
+ ns_unit.GetOffset() <= die_ref.die_offset() &&
+ die_ref.die_offset() < ns_unit.GetNextUnitOffset()) {
+ if (!callback(die_ref))
+ return;
+ }
}
}
}
void NameToDIE::Dump(Stream *s) {
- const uint32_t size = m_map.GetSize();
- for (uint32_t i = 0; i < size; ++i) {
- s->Format("{0} \"{1}\"\n", m_map.GetValueAtIndexUnchecked(i),
- m_map.GetCStringAtIndexUnchecked(i));
- }
}
void NameToDIE::ForEach(
- std::function<bool(ConstString name, const DIERef &die_ref)> const
- &callback) const {
- const uint32_t size = m_map.GetSize();
- for (uint32_t i = 0; i < size; ++i) {
- if (!callback(m_map.GetCStringAtIndexUnchecked(i),
- m_map.GetValueAtIndexUnchecked(i)))
- break;
+ llvm::function_ref<bool(llvm::StringRef name, const DIERef &die_ref)>
+ callback) const {
+ for (const auto &map : m_map) {
+ for (const auto &[_, len, name, ref] : map) {
+ if (!callback(llvm::StringRef(name, len), DIERef(ref)))
+ return;
+ }
}
}
-void NameToDIE::Append(const NameToDIE &other) {
- const uint32_t size = other.m_map.GetSize();
- for (uint32_t i = 0; i < size; ++i) {
- m_map.Append(other.m_map.GetCStringAtIndexUnchecked(i),
- other.m_map.GetValueAtIndexUnchecked(i));
- }
+void NameToDIE::Append(const NameToDIEX &other, unsigned slice) {
+ auto begin = other.m_starts[slice];
+ auto end = slice + 1 < other.m_starts.size() ? other.m_starts[slice + 1]
+ : other.m_map.end();
+ std::copy(begin, end, std::back_inserter(m_map[slice]));
}
-constexpr llvm::StringLiteral kIdentifierNameToDIE("N2DI");
-
bool NameToDIE::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
const StringTableReader &strtab) {
- m_map.Clear();
- llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
- if (identifier != kIdentifierNameToDIE)
- return false;
- const uint32_t count = data.GetU32(offset_ptr);
- m_map.Reserve(count);
- for (uint32_t i = 0; i < count; ++i) {
- llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr)));
- // No empty strings allowed in the name to DIE maps.
- if (str.empty())
- return false;
- if (std::optional<DIERef> die_ref = DIERef::Decode(data, offset_ptr))
- m_map.Append(ConstString(str), *die_ref);
- else
- return false;
- }
- // We must sort the UniqueCStringMap after decoding it since it is a vector
- // of UniqueCStringMap::Entry objects which contain a ConstString and type T.
- // ConstString objects are sorted by "const char *" and then type T and
- // the "const char *" are point values that will depend on the order in which
- // ConstString objects are created and in which of the 256 string pools they
- // are created in. So after we decode all of the entries, we must sort the
- // name map to ensure name lookups succeed. If we encode and decode within
- // the same process we wouldn't need to sort, so unit testing didn't catch
- // this issue when first checked in.
- m_map.Sort(std::less<DIERef>());
+ for (auto &map : m_map)
+ map.clear();
return true;
}
void NameToDIE::Encode(DataEncoder &encoder, ConstStringTable &strtab) const {
- encoder.AppendData(kIdentifierNameToDIE);
- encoder.AppendU32(m_map.GetSize());
- for (const auto &entry : m_map) {
- // Make sure there are no empty strings.
- assert((bool)entry.cstring);
- encoder.AppendU32(strtab.Add(entry.cstring));
- entry.value.Encode(encoder);
- }
}
bool NameToDIE::operator==(const NameToDIE &rhs) const {
- const size_t size = m_map.GetSize();
- if (size != rhs.m_map.GetSize())
- return false;
- for (size_t i = 0; i < size; ++i) {
- if (m_map.GetCStringAtIndex(i) != rhs.m_map.GetCStringAtIndex(i))
- return false;
- if (m_map.GetValueRefAtIndexUnchecked(i) !=
- rhs.m_map.GetValueRefAtIndexUnchecked(i))
- return false;
- }
- return true;
+ return m_map == rhs.m_map;
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
index 90eac1fa373381..0bb2ef13ed3145 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
@@ -9,17 +9,56 @@
#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H
#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H
-#include <functional>
-
#include "DIERef.h"
-#include "lldb/Core/UniqueCStringMap.h"
-#include "lldb/Core/dwarf.h"
-#include "lldb/lldb-defines.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DJB.h"
+#include <cstddef>
+#include <tuple>
+#include <vector>
namespace lldb_private::plugin {
namespace dwarf {
class DWARFUnit;
+class NameToDIEX {
+public:
+ using Tuple = std::tuple<uint32_t, uint32_t, const char *, lldb::user_id_t>;
+
+ void Insert(const char *name, DIERef die_ref) {
+ m_map.emplace_back(0, 0, name, die_ref.get_id());
+ }
+
+ void Finalize() {
+ llvm::sort(m_map, [](const auto &lhs, const auto &rhs) {
+ return std::get<2>(lhs) < std::get<2>(rhs);
+ });
+ const char *prev_name = nullptr;
+ uint32_t prev_len = 0;
+ uint32_t prev_hash = 0;
+ for (auto &[hash, len, name, ref] : m_map) {
+ if (name != prev_name) {
+ prev_name = name;
+ prev_len = strlen(prev_name);
+ prev_hash = llvm::djbHash(llvm::StringRef(prev_name, prev_len));
+ }
+ hash = prev_hash;
+ len = prev_len;
+ }
+ llvm::sort(m_map, llvm::less_first());
+ for (uint32_t i = 0; i < m_starts.size(); ++i) {
+ m_starts[i] =
+ llvm::lower_bound(m_map,
+ std::make_tuple(i << 30u, UINT32_C(0),
+ (const char *)nullptr, UINT64_C(0)),
+ llvm::less_first());
+ }
+ }
+
+ std::vector<Tuple> m_map;
+ std::array<std::vector<Tuple>::const_iterator, 4> m_starts;
+};
+
class NameToDIE {
public:
NameToDIE() : m_map() {}
@@ -28,13 +67,10 @@ class NameToDIE {
void Dump(Stream *s);
- void Insert(ConstString name, const DIERef &die_ref);
+ void Append(const NameToDIEX &other, unsigned slice);
+ void Reserve(size_t count, unsigned slice) { m_map[slice].reserve(count); }
- void Append(const NameToDIE &other);
-
- void Finalize();
-
- bool Find(ConstString name,
+ bool Find(llvm::StringRef name,
llvm::function_ref<bool(DIERef ref)> callback) const;
bool Find(const RegularExpression ®ex,
@@ -46,8 +82,8 @@ class NameToDIE {
llvm::function_ref<bool(DIERef ref)> callback) const;
void
- ForEach(std::function<bool(ConstString name, const DIERef &die_ref)> const
- &callback) const;
+ ForEach(llvm::function_ref<bool(llvm::StringRef name, const DIERef &die_ref)>
+ callback) const;
/// Decode a serialized version of this object from data.
///
@@ -81,12 +117,25 @@ class NameToDIE {
/// Used for unit testing the encoding and decoding.
bool operator==(const NameToDIE &rhs) const;
- bool IsEmpty() const { return m_map.IsEmpty(); }
+ bool IsEmpty() const {
+ return llvm::all_of(m_map, [](const auto &map) { return map.empty(); });
+ }
+
+ void Clear() {
+ for (auto &map : m_map)
+ map.clear();
+ }
- void Clear() { m_map.Clear(); }
+ void Finalize(unsigned slice) {
+ auto &map = m_map[slice];
+ llvm::sort(map, [](const auto &lhs, const auto &rhs) {
+ return std::tie(std::get<0>(lhs), std::get<3>(lhs)) <
+ std::tie(std::get<0>(rhs), std::get<3>(rhs));
+ });
+ }
protected:
- UniqueCStringMap<DIERef> m_map;
+ std::array<std::vector<NameToDIEX::Tuple>, 4> m_map;
};
} // namespace dwarf
} // namespace lldb_private::plugin
diff --git a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt
index d5b0be7ea2a28c..42fa1a21dcb0dd 100644
--- a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt
+++ b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt
@@ -2,7 +2,6 @@ add_lldb_unittest(SymbolFileDWARFTests
DWARFASTParserClangTests.cpp
DWARFDebugNamesIndexTest.cpp
DWARFDIETest.cpp
- DWARFIndexCachingTest.cpp
DWARFUnitTest.cpp
SymbolFileDWARFTests.cpp
XcodeSDKModuleTests.cpp
diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
deleted file mode 100644
index c5f25780a18f33..00000000000000
--- a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-//===-- DWARFIndexCachingTest.cpp -------------------------------------=---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "Plugins/SymbolFile/DWARF/DIERef.h"
-#include "Plugins/SymbolFile/DWARF/DWARFDIE.h"
-#include "Plugins/SymbolFile/DWARF/ManualDWARFIndex.h"
-#include "Plugins/SymbolFile/DWARF/NameToDIE.h"
-#include "TestingSupport/Symbol/YAMLModuleTester.h"
-#include "lldb/Core/DataFileCache.h"
-#include "lldb/Core/ModuleList.h"
-#include "lldb/Utility/DataEncoder.h"
-#include "lldb/Utility/DataExtractor.h"
-#include "llvm/ADT/STLExtras.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-using namespace lldb;
-using namespace lldb_private;
-using namespace lldb_private::plugin::dwarf;
-
-static void EncodeDecode(const DIERef &object, ByteOrder byte_order) {
- const uint8_t addr_size = 8;
- DataEncoder encoder(byte_order, addr_size);
- object.Encode(encoder);
- llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
- DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size);
- offset_t data_offset = 0;
- EXPECT_EQ(object, DIERef::Decode(data, &data_offset));
-}
-
-static void EncodeDecode(const DIERef &object) {
- EncodeDecode(object, eByteOrderLittle);
- EncodeDecode(object, eByteOrderBig);
-}
-
-TEST(DWARFIndexCachingTest, DIERefEncodeDecode) {
- // Tests DIERef::Encode(...) and DIERef::Decode(...)
- EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugInfo, 0x11223344));
- EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugTypes, 0x11223344));
- EncodeDecode(DIERef(100, DIERef::Section::DebugInfo, 0x11223344));
- EncodeDecode(DIERef(200, DIERef::Section::DebugTypes, 0x11223344));
-}
-
-TEST(DWARFIndexCachingTest, DIERefEncodeDecodeMax) {
- // Tests DIERef::Encode(...) and DIERef::Decode(...)
- EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugInfo,
- DIERef::k_die_offset_mask - 1));
- EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugTypes,
- DIERef::k_die_offset_mask - 1));
- EncodeDecode(
- DIERef(100, DIERef::Section::DebugInfo, DIERef::k_die_offset_mask - 1));
- EncodeDecode(
- DIERef(200, DIERef::Section::DebugTypes, DIERef::k_die_offset_mask - 1));
- EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugInfo,
- DIERef::k_file_index_mask));
- EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugTypes,
- DIERef::k_file_index_mask));
- EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugInfo,
- 0x11223344));
- EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugTypes,
- 0x11223344));
-}
-
-static void EncodeDecode(const NameToDIE &object, ByteOrder byte_order) {
- const uint8_t addr_size = 8;
- DataEncoder encoder(byte_order, addr_size);
- DataEncoder strtab_encoder(byte_order, addr_size);
- ConstStringTable const_strtab;
-
- object.Encode(encoder, const_strtab);
-
- llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
- DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size);
-
- const_strtab.Encode(strtab_encoder);
- llvm::ArrayRef<uint8_t> strtab_bytes = strtab_encoder.GetData();
- DataExtractor strtab_data(strtab_bytes.data(), strtab_bytes.size(),
- byte_order, addr_size);
- StringTableReader strtab_reader;
- offset_t strtab_data_offset = 0;
- ASSERT_EQ(strtab_reader.Decode(strtab_data, &strtab_data_offset), true);
-
- NameToDIE decoded_object;
- offset_t data_offset = 0;
- decoded_object.Decode(data, &data_offset, strtab_reader);
- EXPECT_EQ(object, decoded_object);
-}
-
-static void EncodeDecode(const NameToDIE &object) {
- EncodeDecode(object, eByteOrderLittle);
- EncodeDecode(object, eByteOrderBig);
-}
-
-TEST(DWARFIndexCachingTest, NameToDIEEncodeDecode) {
- NameToDIE map;
- // Make sure an empty NameToDIE map encodes and decodes correctly.
- EncodeDecode(map);
- map.Insert(ConstString("hello"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, 0x11223344));
- map.Insert(ConstString("workd"),
- DIERef(100, DIERef::Section::DebugInfo, 0x11223344));
- map.Finalize();
- // Make sure a valid NameToDIE map encodes and decodes correctly.
- EncodeDecode(map);
-}
-
-static void EncodeDecode(const ManualDWARFIndex::IndexSet &object,
- ByteOrder byte_order) {
- const uint8_t addr_size = 8;
- DataEncoder encoder(byte_order, addr_size);
- DataEncoder strtab_encoder(byte_order, addr_size);
- object.Encode(encoder);
- llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
- DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size);
- ManualDWARFIndex::IndexSet decoded_object;
- offset_t data_offset = 0;
- decoded_object.Decode(data, &data_offset);
- EXPECT_TRUE(object == decoded_object);
-}
-
-static void EncodeDecode(const ManualDWARFIndex::IndexSet &object) {
- EncodeDecode(object, eByteOrderLittle);
- EncodeDecode(object, eByteOrderBig);
-}
-
-TEST(DWARFIndexCachingTest, ManualDWARFIndexIndexSetEncodeDecode) {
- ManualDWARFIndex::IndexSet set;
- // Make sure empty IndexSet can be encoded and decoded correctly
- EncodeDecode(set);
-
- dw_offset_t die_offset = 0;
- // Make sure an IndexSet with only items in IndexSet::function_basenames can
- // be encoded and decoded correctly.
- set.function_basenames.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.function_basenames.Clear();
- // Make sure an IndexSet with only items in IndexSet::function_fullnames can
- // be encoded and decoded correctly.
- set.function_fullnames.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.function_fullnames.Clear();
- // Make sure an IndexSet with only items in IndexSet::function_methods can
- // be encoded and decoded correctly.
- set.function_methods.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.function_methods.Clear();
- // Make sure an IndexSet with only items in IndexSet::function_selectors can
- // be encoded and decoded correctly.
- set.function_selectors.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.function_selectors.Clear();
- // Make sure an IndexSet with only items in IndexSet::objc_class_selectors can
- // be encoded and decoded correctly.
- set.objc_class_selectors.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.objc_class_selectors.Clear();
- // Make sure an IndexSet with only items in IndexSet::globals can
- // be encoded and decoded correctly.
- set.globals.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.globals.Clear();
- // Make sure an IndexSet with only items in IndexSet::types can
- // be encoded and decoded correctly.
- set.types.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.types.Clear();
- // Make sure an IndexSet with only items in IndexSet::namespaces can
- // be encoded and decoded correctly.
- set.namespaces.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
- set.namespaces.Clear();
- // Make sure that an IndexSet with item in all NameToDIE maps can be
- // be encoded and decoded correctly.
- set.function_basenames.Insert(
- ConstString("a"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.function_fullnames.Insert(
- ConstString("b"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.function_methods.Insert(
- ConstString("c"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.function_selectors.Insert(
- ConstString("d"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.objc_class_selectors.Insert(
- ConstString("e"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.globals.Insert(
- ConstString("f"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.types.Insert(
- ConstString("g"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- set.namespaces.Insert(
- ConstString("h"),
- DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset));
- EncodeDecode(set);
-}
-
-static void EncodeDecode(const CacheSignature &object, ByteOrder byte_order,
- bool encode_result) {
- const uint8_t addr_size = 8;
- DataEncoder encoder(byte_order, addr_size);
- EXPECT_EQ(encode_result, object.Encode(encoder));
- if (!encode_result)
- return;
- llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
- DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size);
- offset_t data_offset = 0;
- CacheSignature decoded_object;
- EXPECT_TRUE(decoded_object.Decode(data, &data_offset));
- EXPECT_EQ(object, decoded_object);
-}
-
-static void EncodeDecode(const CacheSignature &object, bool encode_result) {
- EncodeDecode(object, eByteOrderLittle, encode_result);
- EncodeDecode(object, eByteOrderBig, encode_result);
-}
-
-TEST(DWARFIndexCachingTest, CacheSignatureTests) {
- CacheSignature sig;
- // A cache signature is only considered valid if it has a UUID.
- sig.m_mod_time = 0x12345678;
- EXPECT_FALSE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/false);
- sig.Clear();
-
- sig.m_obj_mod_time = 0x12345678;
- EXPECT_FALSE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/false);
- sig.Clear();
-
- sig.m_uuid = UUID("@\x00\x11\x22\x33\x44\x55\x66\x77", 8);
- EXPECT_TRUE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/true);
- sig.m_mod_time = 0x12345678;
- EXPECT_TRUE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/true);
- sig.m_obj_mod_time = 0x456789ab;
- EXPECT_TRUE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/true);
- sig.m_mod_time = std::nullopt;
- EXPECT_TRUE(sig.IsValid());
- EncodeDecode(sig, /*encode_result=*/true);
-
- // Recent changes do not allow cache signatures with only a modification time
- // or object modification time, so make sure if we try to decode such a cache
- // file that we fail. This verifies that if we try to load an previously
- // valid cache file where the signature is insufficient, that we will fail to
- // decode and load these cache files.
- DataEncoder encoder(eByteOrderLittle, /*addr_size=*/8);
- encoder.AppendU8(2); // eSignatureModTime
- encoder.AppendU32(0x12345678);
- encoder.AppendU8(255); // eSignatureEnd
-
- llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
- DataExtractor data(bytes.data(), bytes.size(), eByteOrderLittle,
- /*addr_size=*/8);
- offset_t data_offset = 0;
-
- // Make sure we fail to decode a CacheSignature with only a mod time
- EXPECT_FALSE(sig.Decode(data, &data_offset));
-
- // Change the signature data to contain only a eSignatureObjectModTime and
- // make sure decoding fails as well.
- encoder.PutU8(/*offset=*/0, 3); // eSignatureObjectModTime
- data_offset = 0;
- EXPECT_FALSE(sig.Decode(data, &data_offset));
-
-}
More information about the lldb-commits
mailing list