[Lldb-commits] [lldb] [lldb] Add LineTable::{upper, lower}_bound (PR #127519)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Tue Feb 18 02:24:43 PST 2025


https://github.com/labath updated https://github.com/llvm/llvm-project/pull/127519

>From 627bb23342ee22dd263f0142ea43f644afd6faae Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Mon, 17 Feb 2025 17:29:40 +0100
Subject: [PATCH 1/2] [lldb] Add LineTable::{upper,lower}_bound

The motivation is #123622 and the fact that is hard to fine the last
line entry in a given range. `FindLineEntryByAddress(range_end-1)` is
the best we have, but it's not ideal because it has a magic -1 and that
it relies on there existing a line entry at that address (generally, it
should be there, but if for some case it isn't, we might end up ignoring
the entries that are there (or -- like my incorrect fix in #123622 did
-- iterating through the entire line table).

What we really want is to get the last entry that exists in the given
range. Or, equivalently (and more STL-like) the first entry after that
range. This is what these functions do. I've used the STL names since
they do pretty much exactly what the standard functions do (the main
head-scratcher comes from the fact that our entries represent ranges
rather than single values).

The functions can also be used to simplify the maze of `if` statements
in `FindLineEntryByAddress`, but I'm keeping that as a separate patch.
For now, I'm just adding some unit testing for that function to gain
more confidence that the patch does not change the function behavior.
---
 lldb/include/lldb/Symbol/LineTable.h    |  13 ++
 lldb/source/Symbol/LineTable.cpp        |  46 +++-
 lldb/unittests/Symbol/CMakeLists.txt    |   1 +
 lldb/unittests/Symbol/LineTableTest.cpp | 285 ++++++++++++++++++++++++
 4 files changed, 343 insertions(+), 2 deletions(-)
 create mode 100644 lldb/unittests/Symbol/LineTableTest.cpp

diff --git a/lldb/include/lldb/Symbol/LineTable.h b/lldb/include/lldb/Symbol/LineTable.h
index 6d158ab518879..f66081b6ee110 100644
--- a/lldb/include/lldb/Symbol/LineTable.h
+++ b/lldb/include/lldb/Symbol/LineTable.h
@@ -102,6 +102,19 @@ class LineTable {
 
   void GetDescription(Stream *s, Target *target, lldb::DescriptionLevel level);
 
+  /// Helper function for line table iteration. \c lower_bound returns the index
+  /// of the first line entry which ends after the given address (i.e., the
+  /// first entry which contains the given address or it comes after it).
+  /// \c upper_bound returns the index of the first line entry which begins on
+  /// or after the given address (i.e., the entry which would come after the
+  /// entry containing the given address, if such an entry exists). Functions
+  /// return <tt>GetSize()</tt> if there is no such entry. The functions are
+  /// most useful in combination: iterating from <tt>lower_bound(a)</tt> to
+  /// <tt>upper_bound(b) returns all line tables which intersect the half-open
+  /// range <tt>[a,b)</tt>.
+  uint32_t lower_bound(const Address &so_addr) const;
+  uint32_t upper_bound(const Address &so_addr) const;
+
   /// Find a line entry that contains the section offset address \a so_addr.
   ///
   /// \param[in] so_addr
diff --git a/lldb/source/Symbol/LineTable.cpp b/lldb/source/Symbol/LineTable.cpp
index 3d2afcdd11997..aae4ab59ff156 100644
--- a/lldb/source/Symbol/LineTable.cpp
+++ b/lldb/source/Symbol/LineTable.cpp
@@ -123,7 +123,7 @@ void LineTable::InsertSequence(LineSequence *sequence) {
   entry_collection::iterator end_pos = m_entries.end();
   LineTable::Entry::LessThanBinaryPredicate less_than_bp(this);
   entry_collection::iterator pos =
-      upper_bound(begin_pos, end_pos, entry, less_than_bp);
+      std::upper_bound(begin_pos, end_pos, entry, less_than_bp);
 
   // We should never insert a sequence in the middle of another sequence
   if (pos != begin_pos) {
@@ -185,6 +185,48 @@ bool LineTable::GetLineEntryAtIndex(uint32_t idx, LineEntry &line_entry) {
   return false;
 }
 
+uint32_t LineTable::lower_bound(const Address &so_addr) const {
+  if (so_addr.GetModule() != m_comp_unit->GetModule())
+    return GetSize();
+
+  Entry search_entry;
+  search_entry.file_addr = so_addr.GetFileAddress();
+  if (search_entry.file_addr == LLDB_INVALID_ADDRESS)
+    return GetSize();
+
+  // This is not a typo. upper_bound returns the first entry which definitely
+  // does not contain this address, which means the entry before it *might*
+  // contain it -- if it is not a termination entry.
+  auto pos =
+      llvm::upper_bound(m_entries, search_entry, Entry::EntryAddressLessThan);
+
+  if (pos != m_entries.begin() && !std::prev(pos)->is_terminal_entry)
+    --pos;
+
+  return std::distance(m_entries.begin(), pos);
+}
+
+uint32_t LineTable::upper_bound(const Address &so_addr) const {
+  if (so_addr.GetModule() != m_comp_unit->GetModule())
+    return GetSize();
+
+  Entry search_entry;
+  search_entry.file_addr = so_addr.GetFileAddress();
+  if (search_entry.file_addr == LLDB_INVALID_ADDRESS)
+    return GetSize();
+
+  // This is not a typo. lower_bound returns the first entry which starts on or
+  // after the given address, which is exactly what we want -- *except* if the
+  // entry is a termination entry (in that case, we want the one after it).
+  auto pos =
+      llvm::lower_bound(m_entries, search_entry, Entry::EntryAddressLessThan);
+  if (pos != m_entries.end() && pos->file_addr == search_entry.file_addr &&
+      pos->is_terminal_entry)
+    ++pos;
+
+  return std::distance(m_entries.begin(), pos);
+}
+
 bool LineTable::FindLineEntryByAddress(const Address &so_addr,
                                        LineEntry &line_entry,
                                        uint32_t *index_ptr) {
@@ -199,7 +241,7 @@ bool LineTable::FindLineEntryByAddress(const Address &so_addr,
     if (search_entry.file_addr != LLDB_INVALID_ADDRESS) {
       entry_collection::const_iterator begin_pos = m_entries.begin();
       entry_collection::const_iterator end_pos = m_entries.end();
-      entry_collection::const_iterator pos = lower_bound(
+      entry_collection::const_iterator pos = std::lower_bound(
           begin_pos, end_pos, search_entry, Entry::EntryAddressLessThan);
       if (pos != end_pos) {
         if (pos != begin_pos) {
diff --git a/lldb/unittests/Symbol/CMakeLists.txt b/lldb/unittests/Symbol/CMakeLists.txt
index e1d24357e33db..ab5cecd101833 100644
--- a/lldb/unittests/Symbol/CMakeLists.txt
+++ b/lldb/unittests/Symbol/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_lldb_unittest(SymbolTests
   JSONSymbolTest.cpp
+  LineTableTest.cpp
   LocateSymbolFileTest.cpp
   MangledTest.cpp
   PostfixExpressionTest.cpp
diff --git a/lldb/unittests/Symbol/LineTableTest.cpp b/lldb/unittests/Symbol/LineTableTest.cpp
new file mode 100644
index 0000000000000..50890d0ed795c
--- /dev/null
+++ b/lldb/unittests/Symbol/LineTableTest.cpp
@@ -0,0 +1,285 @@
+//===-- LineTableTest.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Plugins/ObjectFile/ELF/ObjectFileELF.h"
+#include "TestingSupport/SubsystemRAII.h"
+#include "TestingSupport/TestUtilities.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/SymbolFile.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace lldb;
+using namespace llvm;
+using namespace lldb_private;
+
+namespace {
+
+// A fake symbol file class to allow us to create the line table "the right
+// way". Pretty much all methods except for GetCompileUnitAtIndex and
+// GetNumCompileUnits are stubbed out.
+class FakeSymbolFile : public SymbolFile {
+public:
+  /// LLVM RTTI support.
+  /// \{
+  bool isA(const void *ClassID) const override {
+    return ClassID == &ID || SymbolFile::isA(ClassID);
+  }
+  static bool classof(const SymbolFile *obj) { return obj->isA(&ID); }
+  /// \}
+
+  static void Initialize() {
+    PluginManager::RegisterPlugin("FakeSymbolFile", "", CreateInstance,
+                                  DebuggerInitialize);
+  }
+  static void Terminate() { PluginManager::UnregisterPlugin(CreateInstance); }
+
+  void InjectCompileUnit(std::unique_ptr<CompileUnit> cu_up) {
+    m_cu_sp = std::move(cu_up);
+  }
+
+private:
+  /// LLVM RTTI support.
+  static char ID;
+
+  static SymbolFile *CreateInstance(ObjectFileSP objfile_sp) {
+    return new FakeSymbolFile(std::move(objfile_sp));
+  }
+  static void DebuggerInitialize(Debugger &) {}
+
+  StringRef GetPluginName() override { return "FakeSymbolFile"; }
+  uint32_t GetAbilities() override { return UINT32_MAX; }
+  uint32_t CalculateAbilities() override { return UINT32_MAX; }
+  uint32_t GetNumCompileUnits() override { return 1; }
+  CompUnitSP GetCompileUnitAtIndex(uint32_t) override { return m_cu_sp; }
+  Symtab *GetSymtab() override { return nullptr; }
+  LanguageType ParseLanguage(CompileUnit &) override { return eLanguageTypeC; }
+  size_t ParseFunctions(CompileUnit &) override { return 0; }
+  bool ParseLineTable(CompileUnit &) override { return true; }
+  bool ParseDebugMacros(CompileUnit &) override { return true; }
+  bool ParseSupportFiles(CompileUnit &, SupportFileList &) override {
+    return true;
+  }
+  size_t ParseTypes(CompileUnit &) override { return 0; }
+  bool ParseImportedModules(const SymbolContext &,
+                            std::vector<SourceModule> &) override {
+    return false;
+  }
+  size_t ParseBlocksRecursive(Function &) override { return 0; }
+  size_t ParseVariablesForContext(const SymbolContext &) override { return 0; }
+  Type *ResolveTypeUID(user_id_t) override { return nullptr; }
+  std::optional<ArrayInfo>
+  GetDynamicArrayInfoForUID(user_id_t, const ExecutionContext *) override {
+    return std::nullopt;
+  }
+  bool CompleteType(CompilerType &) override { return true; }
+  uint32_t ResolveSymbolContext(const Address &, SymbolContextItem,
+                                SymbolContext &) override {
+    return 0;
+  }
+  void GetTypes(SymbolContextScope *, TypeClass, TypeList &) override {}
+  Expected<TypeSystemSP> GetTypeSystemForLanguage(LanguageType) override {
+    return createStringError(std::errc::not_supported, "");
+  }
+  const ObjectFile *GetObjectFile() const override {
+    return m_objfile_sp.get();
+  }
+  ObjectFile *GetObjectFile() override { return m_objfile_sp.get(); }
+  ObjectFile *GetMainObjectFile() override { return m_objfile_sp.get(); }
+  void SectionFileAddressesChanged() override {}
+  void Dump(Stream &) override {}
+  uint64_t GetDebugInfoSize(bool) override { return 0; }
+  bool GetDebugInfoIndexWasLoadedFromCache() const override { return false; }
+  void SetDebugInfoIndexWasLoadedFromCache() override {}
+  bool GetDebugInfoIndexWasSavedToCache() const override { return false; }
+  void SetDebugInfoIndexWasSavedToCache() override {}
+  bool GetDebugInfoHadFrameVariableErrors() const override { return false; }
+  void SetDebugInfoHadFrameVariableErrors() override {}
+  TypeSP MakeType(user_id_t, ConstString, std::optional<uint64_t>,
+                  SymbolContextScope *, user_id_t, Type::EncodingDataType,
+                  const Declaration &, const CompilerType &, Type::ResolveState,
+                  uint32_t) override {
+    return nullptr;
+  }
+  TypeSP CopyType(const TypeSP &) override { return nullptr; }
+
+  FakeSymbolFile(ObjectFileSP objfile_sp)
+      : m_objfile_sp(std::move(objfile_sp)) {}
+
+  ObjectFileSP m_objfile_sp;
+  CompUnitSP m_cu_sp;
+};
+
+struct FakeModuleFixture {
+  TestFile file;
+  ModuleSP module_sp;
+  SectionSP text_sp;
+  LineTable *line_table;
+};
+
+class LineTableTest : public testing::Test {
+  SubsystemRAII<ObjectFileELF, FakeSymbolFile> subsystems;
+};
+
+class LineSequenceBuilder {
+public:
+  std::vector<std::unique_ptr<LineSequence>> Build() {
+    return std::move(m_sequences);
+  }
+
+  void Entry(addr_t addr, bool terminal) {
+    LineTable::AppendLineEntryToSequence(
+        m_seq_up.get(), addr, /*line=*/1, /*column=*/0,
+        /*file_idx=*/0,
+        /*is_start_of_statement=*/false, /*is_start_of_basic_block=*/false,
+        /*is_prologue_end=*/false, /*is_epilogue_begin=*/false, terminal);
+    if (terminal) {
+      m_sequences.push_back(std::move(m_seq_up));
+      m_seq_up = LineTable::CreateLineSequenceContainer();
+    }
+  }
+
+private:
+  std::vector<std::unique_ptr<LineSequence>> m_sequences;
+  std::unique_ptr<LineSequence> m_seq_up =
+      LineTable::CreateLineSequenceContainer();
+};
+
+} // namespace
+
+char FakeSymbolFile::ID;
+
+static llvm::Expected<FakeModuleFixture>
+CreateFakeModule(std::vector<std::unique_ptr<LineSequence>> line_sequences) {
+  Expected<TestFile> file = TestFile::fromYaml(R"(
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_386
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x0010
+    Address:         0x0000
+    Size:            0x1000
+)");
+  if (!file)
+    return file.takeError();
+
+  auto module_sp = std::make_shared<Module>(file->moduleSpec());
+  SectionSP text_sp =
+      module_sp->GetSectionList()->FindSectionByName(ConstString(".text"));
+  if (!text_sp)
+    return createStringError("No .text");
+
+  auto cu_up = std::make_unique<CompileUnit>(module_sp, /*user_data=*/nullptr,
+                                             /*support_file_sp=*/nullptr,
+                                             /*uid=*/0, eLanguageTypeC,
+                                             /*is_optimized=*/eLazyBoolNo);
+  LineTable *line_table = new LineTable(cu_up.get(), std::move(line_sequences));
+  cu_up->SetLineTable(line_table);
+  cast<FakeSymbolFile>(module_sp->GetSymbolFile())
+      ->InjectCompileUnit(std::move(cu_up));
+
+  return FakeModuleFixture{std::move(*file), std::move(module_sp),
+                           std::move(text_sp), line_table};
+}
+
+TEST_F(LineTableTest, LowerAndUpperBound) {
+  LineSequenceBuilder builder;
+  builder.Entry(0, false);
+  builder.Entry(10, false);
+  builder.Entry(20, true);
+  builder.Entry(20, false); // Starts right after the previous sequence.
+  builder.Entry(30, true);
+  builder.Entry(40, false); // Gap after the previous sequence.
+  builder.Entry(50, true);
+
+  llvm::Expected<FakeModuleFixture> fixture = CreateFakeModule(builder.Build());
+  ASSERT_THAT_EXPECTED(fixture, llvm::Succeeded());
+
+  LineTable *table = fixture->line_table;
+
+  auto make_addr = [&](addr_t addr) { return Address(fixture->text_sp, addr); };
+
+  // Both functions return the same value for boundary values. This way the
+  // index range for e.g. [0,10) is [0,1).
+  EXPECT_EQ(table->lower_bound(make_addr(0)), 0u);
+  EXPECT_EQ(table->upper_bound(make_addr(0)), 0u);
+  EXPECT_EQ(table->lower_bound(make_addr(10)), 1u);
+  EXPECT_EQ(table->upper_bound(make_addr(10)), 1u);
+  EXPECT_EQ(table->lower_bound(make_addr(20)), 3u);
+  EXPECT_EQ(table->upper_bound(make_addr(20)), 3u);
+
+  // In case there's no "real" entry at this address, they return the first real
+  // entry.
+  EXPECT_EQ(table->lower_bound(make_addr(30)), 5u);
+  EXPECT_EQ(table->upper_bound(make_addr(30)), 5u);
+
+  EXPECT_EQ(table->lower_bound(make_addr(40)), 5u);
+  EXPECT_EQ(table->upper_bound(make_addr(40)), 5u);
+
+  // For in-between values, their result differs by one. [9,19) maps to [0,2)
+  // because the first two entries contain a part of that range.
+  EXPECT_EQ(table->lower_bound(make_addr(9)), 0u);
+  EXPECT_EQ(table->upper_bound(make_addr(9)), 1u);
+  EXPECT_EQ(table->lower_bound(make_addr(19)), 1u);
+  EXPECT_EQ(table->upper_bound(make_addr(19)), 2u);
+  EXPECT_EQ(table->lower_bound(make_addr(29)), 3u);
+  EXPECT_EQ(table->upper_bound(make_addr(29)), 4u);
+
+  // In a gap, they both return the first entry after the gap.
+  EXPECT_EQ(table->upper_bound(make_addr(39)), 5u);
+  EXPECT_EQ(table->upper_bound(make_addr(39)), 5u);
+
+  // And if there's no such entry, they return the size of the list.
+  EXPECT_EQ(table->lower_bound(make_addr(50)), table->GetSize());
+  EXPECT_EQ(table->upper_bound(make_addr(50)), table->GetSize());
+  EXPECT_EQ(table->lower_bound(make_addr(59)), table->GetSize());
+  EXPECT_EQ(table->upper_bound(make_addr(59)), table->GetSize());
+}
+
+TEST_F(LineTableTest, FindLineEntryByAddress) {
+  LineSequenceBuilder builder;
+  builder.Entry(0, false);
+  builder.Entry(10, false);
+  builder.Entry(20, true);
+  builder.Entry(20, false); // Starts right after the previous sequence.
+  builder.Entry(30, true);
+  builder.Entry(40, false); // Gap after the previous sequence.
+  builder.Entry(50, true);
+
+  llvm::Expected<FakeModuleFixture> fixture = CreateFakeModule(builder.Build());
+  ASSERT_THAT_EXPECTED(fixture, llvm::Succeeded());
+
+  LineTable *table = fixture->line_table;
+
+  auto find = [&](addr_t addr) -> std::tuple<addr_t, addr_t, bool> {
+    LineEntry entry;
+    if (!table->FindLineEntryByAddress(Address(fixture->text_sp, addr), entry))
+      return {LLDB_INVALID_ADDRESS, LLDB_INVALID_ADDRESS, false};
+    return {entry.range.GetBaseAddress().GetFileAddress(),
+            entry.range.GetByteSize(),
+            static_cast<bool>(entry.is_terminal_entry)};
+  };
+
+  EXPECT_THAT(find(0), testing::FieldsAre(0, 10, false));
+  EXPECT_THAT(find(9), testing::FieldsAre(0, 10, false));
+  EXPECT_THAT(find(10), testing::FieldsAre(10, 10, false));
+  EXPECT_THAT(find(19), testing::FieldsAre(10, 10, false));
+  EXPECT_THAT(find(20), testing::FieldsAre(20, 10, false));
+  EXPECT_THAT(find(30), testing::FieldsAre(LLDB_INVALID_ADDRESS,
+                                           LLDB_INVALID_ADDRESS, false));
+  EXPECT_THAT(find(40), testing::FieldsAre(40, 10, false));
+  EXPECT_THAT(find(50), testing::FieldsAre(LLDB_INVALID_ADDRESS,
+                                           LLDB_INVALID_ADDRESS, false));
+}

>From bfe8f5c7338a24420223b794559d4c842d14d190 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Tue, 18 Feb 2025 11:24:36 +0100
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: Jonas Devlieghere <jonas at devlieghere.com>
---
 lldb/unittests/Symbol/LineTableTest.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/lldb/unittests/Symbol/LineTableTest.cpp b/lldb/unittests/Symbol/LineTableTest.cpp
index 50890d0ed795c..314fc09b73ed4 100644
--- a/lldb/unittests/Symbol/LineTableTest.cpp
+++ b/lldb/unittests/Symbol/LineTableTest.cpp
@@ -132,8 +132,10 @@ class LineSequenceBuilder {
   std::vector<std::unique_ptr<LineSequence>> Build() {
     return std::move(m_sequences);
   }
-
-  void Entry(addr_t addr, bool terminal) {
+  enum Terminal : bool {
+    Terminal = true;
+  };
+  void Entry(addr_t addr, bool terminal = false) {
     LineTable::AppendLineEntryToSequence(
         m_seq_up.get(), addr, /*line=*/1, /*column=*/0,
         /*file_idx=*/0,
@@ -196,13 +198,13 @@ CreateFakeModule(std::vector<std::unique_ptr<LineSequence>> line_sequences) {
 
 TEST_F(LineTableTest, LowerAndUpperBound) {
   LineSequenceBuilder builder;
-  builder.Entry(0, false);
-  builder.Entry(10, false);
-  builder.Entry(20, true);
-  builder.Entry(20, false); // Starts right after the previous sequence.
-  builder.Entry(30, true);
-  builder.Entry(40, false); // Gap after the previous sequence.
-  builder.Entry(50, true);
+  builder.Entry(0);
+  builder.Entry(10);
+  builder.Entry(20, Terminal);
+  builder.Entry(20); // Starts right after the previous sequence.
+  builder.Entry(30, Terminal);
+  builder.Entry(40); // Gap after the previous sequence.
+  builder.Entry(50, Terminal);
 
   llvm::Expected<FakeModuleFixture> fixture = CreateFakeModule(builder.Build());
   ASSERT_THAT_EXPECTED(fixture, llvm::Succeeded());



More information about the lldb-commits mailing list